GopherProxy

	Apply clang-format - libgrapheme - unicode string library
	git clone git://git.suckless.org/libgrapheme
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit abdc2ba0c764c527aaa2ed9fe42db27d71a10bc2
	parent 50efb9a3396588e6e1266f51ec5446a9fa8013ea
	Author: Laslo Hunhold <[email protected]>
	Date: Tue, 15 Nov 2022 15:53:56 +0100

	Apply clang-format

	Even though this disrupts the backtrackability of the code a bit,
	it's better to rip the band aid off now than to push it on into the
	future.

	With these changes, formatting is automatically governed and ensured by
	a simple call to

	make format

	Signed-off-by: Laslo Hunhold <[email protected]>

	Diffstat:
	M benchmark/bidirectional.c \| 2 +-
	M benchmark/case.c \| 5 +++--
	M benchmark/character.c \| 12 ++++++------
	M benchmark/line.c \| 4 ++--
	M benchmark/sentence.c \| 7 ++++---
	M benchmark/utf8-decode.c \| 24 +++++++++++-------------
	M benchmark/util.c \| 25 ++++++++++++-------------
	M benchmark/util.h \| 8 ++++----
	M benchmark/word.c \| 4 ++--
	M gen/bidirectional-test.c \| 150 +++++++++++++++++++----------…
	M gen/bidirectional.c \| 144 ++++++++++++++++-------------…
	M gen/case.c \| 79 ++++++++++++++++++-----------…
	M gen/character.c \| 64 ++++++++++++++++-------------…
	M gen/line.c \| 343 +++++++++++++++++------------…
	M gen/sentence.c \| 66 ++++++++++++++++-------------…
	M gen/util.c \| 202 +++++++++++++++++------------…
	M gen/util.h \| 39 ++++++++++++++++-------------…
	M gen/word.c \| 97 ++++++++++++++++-------------…
	M grapheme.h \| 24 ++++++++++++++----------
	M src/bidirectional.c \| 323 +++++++++++++++++++----------…
	M src/case.c \| 125 ++++++++++++++++++-----------…
	M src/character.c \| 160 ++++++++++++++++-------------…
	M src/line.c \| 108 +++++++++++++++++------------…
	M src/sentence.c \| 44 ++++++++++++++++-------------…
	M src/utf8.c \| 26 +++++++++++++-------------
	M src/util.c \| 51 ++++++++++++++++++-----------…
	M src/util.h \| 26 ++++++++++++++------------
	M src/word.c \| 95 +++++++++++++++++------------…
	M test/bidirectional.c \| 20 +++++++++++++-------
	M test/case.c \| 331 +++++++++++++++++++----------…
	M test/character.c \| 19 +++++++++----------
	M test/line.c \| 21 ++++++++-------------
	M test/sentence.c \| 23 ++++++++++-------------
	M test/utf8-decode.c \| 344 +++++++++++++++--------------…
	M test/utf8-encode.c \| 39 ++++++++++++++++-------------…
	M test/util.c \| 44 +++++++++++++++++++----------…
	M test/util.h \| 15 +++++++++++----
	M test/word.c \| 16 ++++++----------

	38 files changed, 1736 insertions(+), 1393 deletions(-)
	---
	diff --git a/benchmark/bidirectional.c b/benchmark/bidirectional.c
	@@ -5,8 +5,8 @@
	#include <stdlib.h>
	#include <string.h>

	-#include "../grapheme.h"
	#include "../gen/bidirectional-test.h"
	+#include "../grapheme.h"
	#include "util.h"

	#define NUM_ITERATIONS 100000
	diff --git a/benchmark/case.c b/benchmark/case.c
	@@ -6,8 +6,8 @@
	#include <stdlib.h>
	#include <string.h>

	-#include "../grapheme.h"
	#include "../gen/word-test.h"
	+#include "../grapheme.h"
	#include "util.h"

	#define NUM_ITERATIONS 10000
	@@ -40,7 +40,8 @@ main(int argc, char *argv[])
	&(p.srclen))) == NULL) {
	return 1;
	}
	- if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) =…
	+ if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) ==
	+ NULL) {
	fprintf(stderr, "calloc: Out of memory\n");
	}

	diff --git a/benchmark/character.c b/benchmark/character.c
	@@ -6,8 +6,8 @@
	#include <stdlib.h>
	#include <string.h>

	-#include "../grapheme.h"
	#include "../gen/character-test.h"
	+#include "../grapheme.h"
	#include "util.h"

	#include <utf8proc.h>
	@@ -28,7 +28,7 @@ libgrapheme(const void *payload)
	size_t i;

	for (i = 0; i + 1 < p->buflen; i++) {
	- (void)grapheme_is_character_break(p->buf[i], p->buf[i+1],
	+ (void)grapheme_is_character_break(p->buf[i], p->buf[i + 1],
	&state);
	}
	}
	@@ -41,9 +41,8 @@ libutf8proc(const void *payload)
	size_t i;

	for (i = 0; i + 1 < p->buflen; i++) {
	- (void)utf8proc_grapheme_break_stateful(p->buf_utf8proc[i],
	- p->buf_utf8proc[i+1],
	- &state);
	+ (void)utf8proc_grapheme_break_stateful(
	+ p->buf_utf8proc[i], p->buf_utf8proc[i + 1], &state);
	}
	}

	@@ -61,7 +60,8 @@ main(int argc, char *argv[])
	&(p.buflen))) == NULL) {
	return 1;
	}
	- if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) ==…
	+ if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) ==
	+ NULL) {
	fprintf(stderr, "malloc: %s\n", strerror(errno));
	exit(1);
	}
	diff --git a/benchmark/line.c b/benchmark/line.c
	@@ -6,8 +6,8 @@
	#include <stdlib.h>
	#include <string.h>

	-#include "../grapheme.h"
	#include "../gen/line-test.h"
	+#include "../grapheme.h"
	#include "util.h"

	#define NUM_ITERATIONS 10000
	@@ -23,7 +23,7 @@ libgrapheme(const void *payload)
	const struct break_benchmark_payload *p = payload;
	size_t off;

	- for (off = 0; off < p->buflen; ) {
	+ for (off = 0; off < p->buflen;) {
	off += grapheme_next_line_break(p->buf + off, p->buflen - off);
	}
	}
	diff --git a/benchmark/sentence.c b/benchmark/sentence.c
	@@ -6,8 +6,8 @@
	#include <stdlib.h>
	#include <string.h>

	-#include "../grapheme.h"
	#include "../gen/sentence-test.h"
	+#include "../grapheme.h"
	#include "util.h"

	#define NUM_ITERATIONS 100000
	@@ -23,8 +23,9 @@ libgrapheme(const void *payload)
	const struct break_benchmark_payload *p = payload;
	size_t off;

	- for (off = 0; off < p->buflen; ) {
	- off += grapheme_next_sentence_break(p->buf + off, p->buflen - …
	+ for (off = 0; off < p->buflen;) {
	+ off += grapheme_next_sentence_break(p->buf + off,
	+ p->buflen - off);
	}
	}

	diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c
	@@ -6,8 +6,8 @@
	#include <stdlib.h>
	#include <string.h>

	-#include "../grapheme.h"
	#include "../gen/character-test.h"
	+#include "../grapheme.h"
	#include "util.h"

	#include <utf8proc.h>
	@@ -28,9 +28,8 @@ libgrapheme(const void *payload)
	size_t ret, off;

	for (off = 0; off < p->buflen; off += ret) {
	- if ((ret = grapheme_decode_utf8(p->buf + off,
	- p->buflen - off, &cp)) >
	- (p->buflen - off)) {
	+ if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off,
	+ &cp)) > (p->buflen - off)) {
	break;
	}
	(void)cp;
	@@ -48,7 +47,7 @@ libutf8proc(const void *payload)
	for (off = 0; off < p->buflen; off += (size_t)ret) {
	if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
	(utf8proc_ssize_t)(p->buflen - off…
	- &cp)) < 0) {
	+ &cp)) < 0) {
	break;
	}
	(void)cp;
	@@ -64,9 +63,8 @@ main(int argc, char *argv[])

	(void)argc;

	- p.buf = generate_utf8_test_buffer(character_break_test,
	- LEN(character_break_test),
	- &(p.buflen));
	+ p.buf = generate_utf8_test_buffer(
	+ character_break_test, LEN(character_break_test), &(p.buflen));

	/* convert cp-buffer to stupid custom libutf8proc-uint8-type */
	if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
	@@ -74,7 +72,7 @@ main(int argc, char *argv[])
	exit(1);
	}
	for (i = 0; i < p.buflen; i++) {
	- /*
	+ /*
	* even if char is larger than 8 bit, it will only have
	* any of the first 8 bits set (by construction).
	*/
	@@ -82,11 +80,11 @@ main(int argc, char *argv[])
	}

	printf("%s\n", argv[0]);
	- run_benchmark(libgrapheme, &p, "libgrapheme ", NULL,
	- "byte", &baseline, NUM_ITERATIONS, p.buflen);
	+ run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline,
	+ NUM_ITERATIONS, p.buflen);
	run_benchmark(libutf8proc, &p, "libutf8proc ",
	- "but unsafe (does not detect overlong encodings)",
	- "byte", &baseline, NUM_ITERATIONS, p.buflen);
	+ "but unsafe (does not detect overlong encodings)", "byte…
	+ &baseline, NUM_ITERATIONS, p.buflen);

	free(p.buf);
	free(p.buf_utf8proc);
	diff --git a/benchmark/util.c b/benchmark/util.c
	@@ -1,7 +1,7 @@
	/* See LICENSE file for copyright and license details. */
	#include <math.h>
	-#include <stdlib.h>
	#include <stdio.h>
	+#include <stdlib.h>
	#include <time.h>

	#include "../gen/types.h"
	@@ -20,7 +20,8 @@ generate_cp_test_buffer(const struct break_test *test, size_t…
	*buflen += test[i].cplen;
	}
	if (!(buf = calloc(buflen, sizeof(buf)))) {
	- fprintf(stderr, "generate_test_buffer: calloc: Out of memory.\…
	+ fprintf(stderr,
	+ "generate_test_buffer: calloc: Out of memory.\n");
	exit(1);
	}
	for (i = 0, off = 0; i < testlen; i++) {
	@@ -48,18 +49,18 @@ generate_utf8_test_buffer(const struct break_test *test, si…
	}
	(buflen)++; / terminating NUL-byte */
	if (!(buf = malloc(*buflen))) {
	- fprintf(stderr, "generate_test_buffer: malloc: Out of memory.\…
	+ fprintf(stderr,
	+ "generate_test_buffer: malloc: Out of memory.\n");
	exit(1);
	}
	for (i = 0, off = 0; i < testlen; i++) {
	for (j = 0; j < test[i].cplen; j++, off += ret) {
	- if ((ret = grapheme_encode_utf8(test[i].cp[j],
	- buf + off,
	- *buflen - off)) >
	+ if ((ret = grapheme_encode_utf8(
	+ test[i].cp[j], buf + off, *buflen - off))…
	(*buflen - off)) {
	/* shouldn't happen */
	fprintf(stderr, "generate_utf8_test_buffer: "
	- "Buffer too small.\n");
	+ "Buffer too small.\n");
	exit(1);
	}
	}
	@@ -77,10 +78,9 @@ time_diff(struct timespec a, struct timespec b)
	}

	void
	-run_benchmark(void (func)(const void ), const void *payload,
	- const char name, const char comment, const char *unit,
	- double *baseline, size_t num_iterations,
	- size_t units_per_iteration)
	+run_benchmark(void (func)(const void ), const void payload, const char nam…
	+ const char comment, const char unit, double *baseline,
	+ size_t num_iterations, size_t units_per_iteration)
	{
	struct timespec start, end;
	size_t i;
	@@ -109,7 +109,6 @@ run_benchmark(void (func)(const void ), const void *paylo…
	printf(" avg. %.3es/%s (%.2f%% %s%s%s)\n", diff, unit,
	fabs(1.0 - diff / baseline) 100,
	(diff < *baseline) ? "faster" : "slower",
	- comment ? ", " : "",
	- comment ? comment : "");
	+ comment ? ", " : "", comment ? comment : "");
	}
	}
	diff --git a/benchmark/util.h b/benchmark/util.h
	@@ -7,10 +7,10 @@
	#define LEN(x) (sizeof(x) / sizeof(*(x)))

	#ifdef __has_attribute
	- #if __has_attribute(optnone)
	- void libgrapheme(const void *) __attribute__((optnone));
	- void libutf8proc(const void *) __attribute__((optnone));
	- #endif
	+#if __has_attribute(optnone)
	+void libgrapheme(const void *) __attribute__((optnone));
	+void libutf8proc(const void *) __attribute__((optnone));
	+#endif
	#endif

	uint_least32_t generate_cp_test_buffer(const struct break_test , size_t,
	diff --git a/benchmark/word.c b/benchmark/word.c
	@@ -6,8 +6,8 @@
	#include <stdlib.h>
	#include <string.h>

	-#include "../grapheme.h"
	#include "../gen/word-test.h"
	+#include "../grapheme.h"
	#include "util.h"

	#define NUM_ITERATIONS 10000
	@@ -23,7 +23,7 @@ libgrapheme(const void *payload)
	const struct break_benchmark_payload *p = payload;
	size_t off;

	- for (off = 0; off < p->buflen; ) {
	+ for (off = 0; off < p->buflen;) {
	off += grapheme_next_word_break(p->buf + off, p->buflen - off);
	}
	}
	diff --git a/gen/bidirectional-test.c b/gen/bidirectional-test.c
	@@ -3,8 +3,8 @@
	#include <inttypes.h>
	#include <stddef.h>
	#include <stdio.h>
	-#include <string.h>
	#include <stdlib.h>
	+#include <string.h>

	#include "../grapheme.h"
	#include "util.h"
	@@ -23,29 +23,29 @@ static const struct {
	const char *class;
	const uint_least32_t cp;
	} classcpmap[] = {
	- { .class = "L", .cp = UINT32_C(0x0041) },
	- { .class = "AL", .cp = UINT32_C(0x0608) },
	- { .class = "AN", .cp = UINT32_C(0x0600) },
	- { .class = "B", .cp = UINT32_C(0x000A) },
	- { .class = "BN", .cp = UINT32_C(0x0000) },
	- { .class = "CS", .cp = UINT32_C(0x002C) },
	- { .class = "EN", .cp = UINT32_C(0x0030) },
	- { .class = "ES", .cp = UINT32_C(0x002B) },
	- { .class = "ET", .cp = UINT32_C(0x0023) },
	+ { .class = "L", .cp = UINT32_C(0x0041) },
	+ { .class = "AL", .cp = UINT32_C(0x0608) },
	+ { .class = "AN", .cp = UINT32_C(0x0600) },
	+ { .class = "B", .cp = UINT32_C(0x000A) },
	+ { .class = "BN", .cp = UINT32_C(0x0000) },
	+ { .class = "CS", .cp = UINT32_C(0x002C) },
	+ { .class = "EN", .cp = UINT32_C(0x0030) },
	+ { .class = "ES", .cp = UINT32_C(0x002B) },
	+ { .class = "ET", .cp = UINT32_C(0x0023) },
	{ .class = "FSI", .cp = UINT32_C(0x2068) },
	{ .class = "LRE", .cp = UINT32_C(0x202A) },
	{ .class = "LRI", .cp = UINT32_C(0x2066) },
	{ .class = "LRO", .cp = UINT32_C(0x202D) },
	{ .class = "NSM", .cp = UINT32_C(0x0300) },
	- { .class = "ON", .cp = UINT32_C(0x0021) },
	+ { .class = "ON", .cp = UINT32_C(0x0021) },
	{ .class = "PDF", .cp = UINT32_C(0x202C) },
	{ .class = "PDI", .cp = UINT32_C(0x2069) },
	- { .class = "R", .cp = UINT32_C(0x05BE) },
	+ { .class = "R", .cp = UINT32_C(0x05BE) },
	{ .class = "RLE", .cp = UINT32_C(0x202B) },
	{ .class = "RLI", .cp = UINT32_C(0x2067) },
	{ .class = "RLO", .cp = UINT32_C(0x202E) },
	- { .class = "S", .cp = UINT32_C(0x0009) },
	- { .class = "WS", .cp = UINT32_C(0x000C) },
	+ { .class = "S", .cp = UINT32_C(0x0009) },
	+ { .class = "WS", .cp = UINT32_C(0x000C) },
	};

	static int
	@@ -59,7 +59,8 @@ classtocp(const char str, size_t len, uint_least32_t cp)
	return 0;
	}
	}
	- fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,…
	+ fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,
	+ str);

	return 1;
	}
	@@ -77,8 +78,10 @@ parse_class_list(const char str, uint_least32_t *cp, size_…
	}

	/* count the number of spaces in the string and infer list length */
	- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+…
	+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
	+ count++, tmp1 = tmp2 + 1) {
	;
	+ }

	/* allocate resources */
	if (!(cp = calloc((cplen = count), sizeof(**cp)))) {
	@@ -89,7 +92,8 @@ parse_class_list(const char str, uint_least32_t *cp, size_t…
	/* go through the string again, parsing the classes */
	for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
	tmp2 = strchr(tmp1, ' ');
	- if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1…
	+ if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1…
	+ &((*cp)[i]))) {
	return 1;
	}
	if (tmp2 != NULL) {
	@@ -135,12 +139,10 @@ strtolevel(const char str, size_t len, int_least8_t lev…
	if (str[0] != '1') {
	goto toolarge;
	}
	- level = (str[0] - '0') 100 +
	- (str[1] - '0') * 10 +
	- (str[2] - '0');
	+ level = (str[0] - '0') 100 + (str[1] - '0') * 10 +
	+ (str[2] - '0');
	} else if (len == 2) {
	- level = (str[0] - '0') 10 +
	- (str[1] - '0');
	+ level = (str[0] - '0') 10 + (str[1] - '0');
	} else if (len == 1) {
	*level = (str[0] - '0');
	} else { /* len == 0 */
	@@ -149,8 +151,7 @@ strtolevel(const char str, size_t len, int_least8_t level)

	return 0;
	toolarge:
	- fprintf(stderr, "hextocp: '%.*s' is too large.\n",
	- (int)len, str);
	+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, str);
	return 1;
	}

	@@ -167,8 +168,10 @@ parse_level_list(const char str, int_least8_t *level, si…
	}

	/* count the number of spaces in the string and infer list length */
	- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+…
	+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
	+ count++, tmp1 = tmp2 + 1) {
	;
	+ }

	/* allocate resources */
	if (!(level = calloc((levellen = count), sizeof(**level)))) {
	@@ -179,7 +182,9 @@ parse_level_list(const char str, int_least8_t *level, siz…
	/* go through the string again, parsing the levels */
	for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
	tmp2 = strchr(tmp1, ' ');
	- if (strtolevel(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp…
	+ if (strtolevel(tmp1,
	+ tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
	+ &((*level)[i]))) {
	return 1;
	}
	if (tmp2 != NULL) {
	@@ -199,7 +204,8 @@ bidirectional_test_list_print(const struct bidirectional_te…

	printf("/* Automatically generated by %s */\n"
	"#include <stdint.h>\n#include <stddef.h>\n\n"
	- "#include \"../grapheme.h\"\n\n", progname);
	+ "#include \"../grapheme.h\"\n\n",
	+ progname);

	printf("static const struct {\n"
	"\tuint_least32_t *cp;\n"
	@@ -208,7 +214,8 @@ bidirectional_test_list_print(const struct bidirectional_te…
	"\tsize_t modelen;\n"
	"\tint_least8_t *level;\n"
	"\tint_least8_t *reorder;\n"
	- "\tsize_t reorderlen;\n} %s[] = {\n", identifier);
	+ "\tsize_t reorderlen;\n} %s[] = {\n",
	+ identifier);
	for (i = 0; i < testlen; i++) {
	printf("\t{\n");

	@@ -222,11 +229,13 @@ bidirectional_test_list_print(const struct bidirectional_…
	printf(" },\n");
	printf("\t\t.cplen = %zu,\n", test[i].cplen);

	- printf("\t\t.mode = (enum grapheme_bidirectional_overrid…
	+ printf("\t\t.mode = (enum "
	+ "grapheme_bidirectional_override[]){");
	for (j = 0; j < test[i].modelen; j++) {
	if (test[i].mode[j] ==
	GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL) {
	- printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTR…
	+ printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_"
	+ "NEUTRAL");
	} else if (test[i].mode[j] ==
	GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
	printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR");
	@@ -279,8 +288,8 @@ static int_least8_t *current_reorder;
	static size_t current_reorder_len;

	static int
	-test_callback(const char file, char *field, size_t nfields,
	- char comment, void payload)
	+test_callback(const char file, char field, size_t nfields, char comment,
	+ void *payload)
	{
	char *tmp;

	@@ -292,23 +301,31 @@ test_callback(const char file, char *field, size_t nfie…
	if (nfields > 0 && field[0][0] == '@') {
	if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) {
	tmp = field[0] + sizeof("@Levels:") - 1;
	- for (; tmp != '\0' && (tmp == ' ' \|\| *tmp == '\t'); …
	+ for (; tmp != '\0' && (tmp == ' ' \|\| *tmp == '\t');
	+ tmp++) {
	;
	+ }
	free(current_level);
	- parse_level_list(tmp, &current_level, &current_level_l…
	- } else if (!strncmp(field[0], "@Reorder:", sizeof("@Reorder:")…
	+ parse_level_list(tmp, &current_level,
	+ &current_level_len);
	+ } else if (!strncmp(field[0],
	+ "@Reorder:", sizeof("@Reorder:") - 1)) {
	tmp = field[0] + sizeof("@Reorder:") - 1;
	- for (; tmp != '\0' && (tmp == ' ' \|\| *tmp == '\t'); …
	+ for (; tmp != '\0' && (tmp == ' ' \|\| *tmp == '\t');
	+ tmp++) {
	;
	+ }
	free(current_reorder);
	- parse_level_list(tmp, &current_reorder, &current_reord…
	+ parse_level_list(tmp, &current_reorder,
	+ &current_reorder_len);
	} else {
	fprintf(stderr, "Unknown @-input-line.\n");
	exit(1);
	}
	} else {
	if (nfields < 2) {
	- /* discard any line that does not have at least 2 fiel…
	+ /* discard any line that does not have at least 2 fiel…
	+ */
	return 0;
	}

	@@ -321,26 +338,33 @@ test_callback(const char file, char *field, size_t nfie…
	/* parse field data */
	parse_class_list(field[0], &(test[testlen - 1].cp),
	&(test[testlen - 1].cplen));
	-
	+
	/* copy current level- and reorder-arrays */
	- if (!(test[testlen - 1].level = calloc(current_level_len, size…
	+ if (!(test[testlen - 1].level =
	+ calloc(current_level_len,
	+ sizeof(*(test[testlen - 1].level))))) {
	fprintf(stderr, "calloc: %s\n", strerror(errno));
	exit(1);
	}
	- memcpy(test[testlen - 1].level, current_level, current_level_l…
	+ memcpy(test[testlen - 1].level, current_level,
	+ current_level_len * sizeof(*(test[testlen - 1].level)));

	- if (!(test[testlen - 1].reorder = calloc(current_reorder_len, …
	+ if (!(test[testlen - 1].reorder =
	+ calloc(current_reorder_len,
	+ sizeof(*(test[testlen - 1].reorder))))) {
	fprintf(stderr, "calloc: %s\n", strerror(errno));
	exit(1);
	}
	if (current_reorder != NULL) {
	memcpy(test[testlen - 1].reorder, current_reorder,
	- current_reorder_len * sizeof(*(test[testlen - 1…
	+ current_reorder_len *
	+ sizeof(*(test[testlen - 1].reorder)));
	}
	test[testlen - 1].reorderlen = current_reorder_len;
	-
	+
	if (current_level_len != test[testlen - 1].cplen) {
	- fprintf(stderr, "mismatch between string and level len…
	+ fprintf(stderr,
	+ "mismatch between string and level lengths.\n"…
	exit(1);
	}

	@@ -349,27 +373,38 @@ test_callback(const char file, char *field, size_t nfie…
	fprintf(stderr, "malformed paragraph-level-bitset.\n");
	exit(1);
	} else if (field[1][0] == '2') {
	- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
	+ test[testlen - 1].mode[0] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
	test[testlen - 1].modelen = 1;
	} else if (field[1][0] == '3') {
	/* auto=0 and LTR=1 */
	- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
	- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE…
	+ test[testlen - 1].mode[0] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
	+ test[testlen - 1].mode[1] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
	test[testlen - 1].modelen = 2;
	} else if (field[1][0] == '4') {
	- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
	+ test[testlen - 1].mode[0] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
	test[testlen - 1].modelen = 1;
	- } else if (field[1][0] == '5') {
	- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
	- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE…
	+ } else if (field[1][0] == '5') {
	+ test[testlen - 1].mode[0] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
	+ test[testlen - 1].mode[1] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
	test[testlen - 1].modelen = 2;
	} else if (field[1][0] == '7') {
	- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVE…
	- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVE…
	- test[testlen - 1].mode[2] = GRAPHEME_BIDIRECTIONAL_OVE…
	+ test[testlen - 1].mode[0] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
	+ test[testlen - 1].mode[1] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
	+ test[testlen - 1].mode[2] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
	test[testlen - 1].modelen = 3;
	} else {
	- fprintf(stderr, "unhandled paragraph-level-bitset %s.\…
	+ fprintf(stderr,
	+ "unhandled paragraph-level-bitset %s.\n",
	+ field[1]);
	exit(1);
	}
	}
	@@ -414,7 +449,8 @@ character_test_callback(const char file, char *field, siz…
	} else if (field[1][0] == '1') {
	test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RT…
	} else if (field[1][0] == '2') {
	- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NE…
	+ test[testlen - 1].mode[0] =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
	} else {
	fprintf(stderr, "unhandled paragraph-level-setting.\n");
	exit(1);
	diff --git a/gen/bidirectional.c b/gen/bidirectional.c
	@@ -15,118 +15,118 @@ static const struct property_spec bidi_property[] = {
	{
	/* default */
	.enumname = "L",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "L",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "L",
	},
	{
	.enumname = "AL",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "AL",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "AL",
	},
	{
	.enumname = "AN",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "AN",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "AN",
	},
	{
	.enumname = "B",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "B",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "B",
	},
	{
	.enumname = "BN",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "BN",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "BN",
	},
	{
	.enumname = "CS",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "CS",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "CS",
	},
	{
	.enumname = "EN",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "EN",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "EN",
	},
	{
	.enumname = "ES",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "ES",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "ES",
	},
	{
	.enumname = "ET",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "ET",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "ET",
	},
	{
	.enumname = "FSI",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "FSI",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "FSI",
	},
	{
	.enumname = "LRE",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "LRE",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "LRE",
	},
	{
	.enumname = "LRI",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "LRI",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "LRI",
	},
	{
	.enumname = "LRO",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "LRO",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "LRO",
	},
	{
	.enumname = "NSM",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "NSM",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "NSM",
	},
	{
	.enumname = "ON",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "ON",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "ON",
	},
	{
	.enumname = "PDF",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "PDF",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "PDF",
	},
	{
	.enumname = "PDI",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "PDI",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "PDI",
	},
	{
	.enumname = "R",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "R",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "R",
	},
	{
	.enumname = "RLE",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "RLE",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "RLE",
	},
	{
	.enumname = "RLI",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "RLI",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "RLI",
	},
	{
	.enumname = "RLO",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "RLO",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "RLO",
	},
	{
	.enumname = "S",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "S",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "S",
	},
	{
	.enumname = "WS",
	- .file = FILE_BIDI_CLASS,
	- .ucdname = "WS",
	+ .file = FILE_BIDI_CLASS,
	+ .ucdname = "WS",
	},
	};

	@@ -135,11 +135,12 @@ static struct {
	uint_least32_t cp_pair;
	char type;
	} *b = NULL;
	+
	static size_t blen;

	static int
	-bracket_callback(const char file, char *field, size_t nfields,
	- char comment, void payload)
	+bracket_callback(const char file, char field, size_t nfields, char comment,
	+ void *payload)
	{
	(void)file;
	(void)comment;
	@@ -189,11 +190,12 @@ post_process(struct properties *prop)
	}

	static uint_least8_t
	-fill_missing(uint_least32_t cp) {
	+fill_missing(uint_least32_t cp)
	+{
	/* based on the @missing-properties in data/DerivedBidiClass.txt */
	- if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) \|\|
	- (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) \|\|
	- (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) \|\|
	+ if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) \|\|
	+ (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) \|\|
	+ (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) \|\|
	(cp >= UINT32_C(0x10800) && cp <= UINT32_C(0x10CFF)) \|\|
	(cp >= UINT32_C(0x10D40) && cp <= UINT32_C(0x10EBF)) \|\|
	(cp >= UINT32_C(0x10F00) && cp <= UINT32_C(0x10F2F)) \|\|
	@@ -203,22 +205,22 @@ fill_missing(uint_least32_t cp) {
	(cp >= UINT32_C(0x1ED50) && cp <= UINT32_C(0x1EDFF)) \|\|
	(cp >= UINT32_C(0x1EF00) && cp <= UINT32_C(0x1EFFF))) {
	return 17; /* class R */
	- } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) \|\|
	- (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) \|\|
	- (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) \|\|
	- (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) \|\|
	- (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) \|\|
	+ } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) \|\|
	+ (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) \|\|
	+ (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) \|\|
	+ (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) \|\|
	+ (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) \|\|
	(cp >= UINT32_C(0x10D00) && cp <= UINT32_C(0x10D3F)) \|\|
	(cp >= UINT32_C(0x10EC0) && cp <= UINT32_C(0x10EFF)) \|\|
	- (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) \|\|
	+ (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) \|\|
	(cp >= UINT32_C(0x1EC70) && cp <= UINT32_C(0x1ECBF)) \|\|
	(cp >= UINT32_C(0x1ED00) && cp <= UINT32_C(0x1ED4F)) \|\|
	(cp >= UINT32_C(0x1EE00) && cp <= UINT32_C(0x1EEFF))) {
	- return 1; /* class AL */
	+ return 1; /* class AL */
	} else if (cp >= UINT32_C(0x20A0) && cp <= UINT32_C(0x20CF)) {
	- return 8; /* class ET */
	+ return 8; /* class ET */
	} else {
	- return 0; /* class L */
	+ return 0; /* class L */
	}
	}

	@@ -238,13 +240,11 @@ main(int argc, char *argv[])
	fprintf(stderr, "calloc: %s\n", strerror(errno));
	exit(1);
	}
	- parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback,
	- NULL);
	+ parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback, NULL);

	- properties_generate_break_property(bidi_property,
	- LEN(bidi_property), fill_missing,
	- NULL, post_process, "bidi",
	- argv[0]);
	+ properties_generate_break_property(bidi_property, LEN(bidi_property),
	+ fill_missing, NULL, post_process,
	+ "bidi", argv[0]);

	printf("\nenum bracket_type {\n\tBIDI_BRACKET_NONE,\n\t"
	"BIDI_BRACKET_OPEN,\n\tBIDI_BRACKET_CLOSE,\n};\n\n"
	@@ -252,10 +252,12 @@ main(int argc, char *argv[])
	"\tuint_least32_t pair;\n};\n\n"
	"static const struct bracket bidi_bracket[] = {\n");
	for (i = 0; i < blen; i++) {
	- printf("\t{\n\t\t.type = %s,\n\t\t.pair = UINT32_C(0x%06X),\n\…
	- (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" :
	- (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" : "BIDI_BRACKET_NONE…
	- b[i].cp_pair);
	+ printf("\t{\n\t\t.type = %s,\n\t\t.pair = "
	+ "UINT32_C(0x%06X),\n\t},\n",
	+ (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" :
	+ (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" :
	+ "BIDI_BRACKET_NONE",
	+ b[i].cp_pair);
	}
	printf("};\n");

	diff --git a/gen/case.c b/gen/case.c
	@@ -12,28 +12,28 @@
	static const struct property_spec case_property[] = {
	{
	.enumname = "OTHER",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "BOTH_CASED_CASE_IGNORABLE",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	- {
	+ {
	.enumname = "CASED",
	- .file = FILE_DCP,
	- .ucdname = "Cased",
	+ .file = FILE_DCP,
	+ .ucdname = "Cased",
	},
	{
	.enumname = "CASE_IGNORABLE",
	- .file = FILE_DCP,
	- .ucdname = "Case_Ignorable",
	+ .file = FILE_DCP,
	+ .ucdname = "Case_Ignorable",
	},
	{
	.enumname = "UNCASED",
	- .file = FILE_DCP,
	- .ucdname = "Uncased",
	+ .file = FILE_DCP,
	+ .ucdname = "Uncased",
	},
	};

	@@ -67,12 +67,14 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uin…
	}

	static struct properties prop_upper = NULL, prop_lower, *prop_title;
	+
	static struct special_case {
	struct {
	uint_least32_t *cp;
	size_t cplen;
	} upper, lower, title;
	} *sc = NULL;
	+
	static size_t sclen = 0;

	static int
	@@ -89,9 +91,12 @@ unicodedata_callback(const char file, char *field, size_t …

	upper = lower = title = cp;

	- if ((strlen(field[12]) > 0 && hextocp(field[12], strlen(field[12]), &u…
	- (strlen(field[13]) > 0 && hextocp(field[13], strlen(field[13]), &l…
	- (nfields >= 15 && strlen(field[14]) > 0 && hextocp(field[14], strl…
	+ if ((strlen(field[12]) > 0 &&
	+ hextocp(field[12], strlen(field[12]), &upper)) \|\|
	+ (strlen(field[13]) > 0 &&
	+ hextocp(field[13], strlen(field[13]), &lower)) \|\|
	+ (nfields >= 15 && strlen(field[14]) > 0 &&
	+ hextocp(field[14], strlen(field[14]), &title))) {
	return 1;
	}

	@@ -126,7 +131,7 @@ specialcasing_callback(const char file, char *field, size…
	/* extend special case array */
	if (!(sc = realloc(sc, (++sclen) * sizeof(*sc)))) {
	fprintf(stderr, "realloc: %s\n", strerror(errno));
	- exit(1);
	+ exit(1);
	}

	/* parse field data */
	@@ -142,9 +147,12 @@ specialcasing_callback(const char file, char *field, siz…
	* special value 0x110000 + (offset in special case array),
	* even if the special case has length 1
	*/
	- prop_upper[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen …
	- prop_lower[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen …
	- prop_title[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen …
	+ prop_upper[cp].property =
	+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
	+ prop_lower[cp].property =
	+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
	+ prop_title[cp].property =
	+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));

	return 0;
	}
	@@ -165,9 +173,8 @@ main(int argc, char *argv[])
	(void)argc;

	/* generate case property table from the specification */
	- properties_generate_break_property(case_property,
	- LEN(case_property), NULL,
	- handle_conflict, NULL, "case",
	+ properties_generate_break_property(case_property, LEN(case_property),
	+ NULL, handle_conflict, NULL, "case",
	argv[0]);

	/*
	@@ -186,38 +193,46 @@ main(int argc, char *argv[])
	}
	parse_file_with_callback("data/UnicodeData.txt", unicodedata_callback,
	NULL);
	- parse_file_with_callback("data/SpecialCasing.txt", specialcasing_callb…
	- NULL);
	+ parse_file_with_callback("data/SpecialCasing.txt",
	+ specialcasing_callback, NULL);

	/* compress properties */
	properties_compress(prop_upper, &comp_upper);
	properties_compress(prop_lower, &comp_lower);
	properties_compress(prop_title, &comp_title);

	- fprintf(stderr, "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%…
	+ fprintf(stderr,
	+ "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%, "
	+ "title=%.2f%%\n",
	argv[0], properties_get_major_minor(&comp_upper, &mm_upper),
	properties_get_major_minor(&comp_lower, &mm_lower),
	properties_get_major_minor(&comp_title, &mm_title));

	/* print tables */
	- printf("/* Automatically generated by %s */\n#include <stdint.h>\n#inc…
	+ printf("/* Automatically generated by %s */\n#include "
	+ "<stdint.h>\n#include <stddef.h>\n\n",
	+ argv[0]);

	- printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t cplen;\…
	+ printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t "
	+ "cplen;\n};\n\n");

	properties_print_lookup_table("upper_major", mm_upper.major, 0x1100);
	printf("\n");
	- properties_print_derived_lookup_table("upper_minor", "int_least32_t", …
	- mm_upper.minorlen, get_value, co…
	+ properties_print_derived_lookup_table("upper_minor", "int_least32_t",
	+ mm_upper.minor, mm_upper.minorle…
	+ get_value, comp_upper.data);
	printf("\n");
	properties_print_lookup_table("lower_major", mm_lower.major, 0x1100);
	printf("\n");
	- properties_print_derived_lookup_table("lower_minor", "int_least32_t", …
	- mm_lower.minorlen, get_value, co…
	+ properties_print_derived_lookup_table("lower_minor", "int_least32_t",
	+ mm_lower.minor, mm_lower.minorle…
	+ get_value, comp_lower.data);
	printf("\n");
	properties_print_lookup_table("title_major", mm_title.major, 0x1100);
	printf("\n");
	- properties_print_derived_lookup_table("title_minor", "int_least32_t", …
	- mm_title.minorlen, get_value, co…
	+ properties_print_derived_lookup_table("title_minor", "int_least32_t",
	+ mm_title.minor, mm_title.minorle…
	+ get_value, comp_title.data);
	printf("\n");

	printf("static const struct special_case upper_special[] = {\n");
	diff --git a/gen/character.c b/gen/character.c
	@@ -9,78 +9,78 @@
	static const struct property_spec char_break_property[] = {
	{
	.enumname = "OTHER",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "CONTROL",
	- .file = FILE_GRAPHEME,
	- .ucdname = "Control",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "Control",
	},
	{
	.enumname = "CR",
	- .file = FILE_GRAPHEME,
	- .ucdname = "CR",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "CR",
	},
	{
	.enumname = "EXTEND",
	- .file = FILE_GRAPHEME,
	- .ucdname = "Extend",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "Extend",
	},
	{
	.enumname = "EXTENDED_PICTOGRAPHIC",
	- .file = FILE_EMOJI,
	- .ucdname = "Extended_Pictographic",
	+ .file = FILE_EMOJI,
	+ .ucdname = "Extended_Pictographic",
	},
	{
	.enumname = "HANGUL_L",
	- .file = FILE_GRAPHEME,
	- .ucdname = "L",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "L",
	},
	{
	.enumname = "HANGUL_V",
	- .file = FILE_GRAPHEME,
	- .ucdname = "V",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "V",
	},
	{
	.enumname = "HANGUL_T",
	- .file = FILE_GRAPHEME,
	- .ucdname = "T",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "T",
	},
	{
	.enumname = "HANGUL_LV",
	- .file = FILE_GRAPHEME,
	- .ucdname = "LV",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "LV",
	},
	{
	.enumname = "HANGUL_LVT",
	- .file = FILE_GRAPHEME,
	- .ucdname = "LVT",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "LVT",
	},
	{
	.enumname = "LF",
	- .file = FILE_GRAPHEME,
	- .ucdname = "LF",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "LF",
	},
	{
	.enumname = "PREPEND",
	- .file = FILE_GRAPHEME,
	- .ucdname = "Prepend",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "Prepend",
	},
	{
	.enumname = "REGIONAL_INDICATOR",
	- .file = FILE_GRAPHEME,
	- .ucdname = "Regional_Indicator",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "Regional_Indicator",
	},
	{
	.enumname = "SPACINGMARK",
	- .file = FILE_GRAPHEME,
	- .ucdname = "SpacingMark",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "SpacingMark",
	},
	{
	.enumname = "ZWJ",
	- .file = FILE_GRAPHEME,
	- .ucdname = "ZWJ",
	+ .file = FILE_GRAPHEME,
	+ .ucdname = "ZWJ",
	},
	};

	@@ -90,8 +90,8 @@ main(int argc, char *argv[])
	(void)argc;

	properties_generate_break_property(char_break_property,
	- LEN(char_break_property), NULL,
	- NULL, NULL, "char_break", argv[0]);
	+ LEN(char_break_property), NULL, NUL…
	+ NULL, "char_break", argv[0]);

	return 0;
	}
	diff --git a/gen/line.c b/gen/line.c
	@@ -12,8 +12,8 @@
	static const struct property_spec line_break_property[] = {
	{
	.enumname = "AL",
	- .file = FILE_LINE,
	- .ucdname = "AL",
	+ .file = FILE_LINE,
	+ .ucdname = "AL",
	},
	/*
	* Both extended pictographic and cn are large classes,
	@@ -32,269 +32,269 @@ static const struct property_spec line_break_property[] =…
	*/
	{
	.enumname = "TMP_CN",
	- .file = FILE_LINE,
	- .ucdname = "Cn",
	+ .file = FILE_LINE,
	+ .ucdname = "Cn",
	},
	{
	.enumname = "TMP_EXTENDED_PICTOGRAPHIC",
	- .file = FILE_EMOJI,
	- .ucdname = "Extended_Pictographic",
	+ .file = FILE_EMOJI,
	+ .ucdname = "Extended_Pictographic",
	},
	/* end of special block */
	{
	.enumname = "B2",
	- .file = FILE_LINE,
	- .ucdname = "B2",
	+ .file = FILE_LINE,
	+ .ucdname = "B2",
	},
	{
	.enumname = "BA",
	- .file = FILE_LINE,
	- .ucdname = "BA",
	+ .file = FILE_LINE,
	+ .ucdname = "BA",
	},
	{
	.enumname = "BB",
	- .file = FILE_LINE,
	- .ucdname = "BB",
	+ .file = FILE_LINE,
	+ .ucdname = "BB",
	},
	{
	.enumname = "BK",
	- .file = FILE_LINE,
	- .ucdname = "BK",
	+ .file = FILE_LINE,
	+ .ucdname = "BK",
	},
	{
	.enumname = "BOTH_CN_EXTPICT",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "CB",
	- .file = FILE_LINE,
	- .ucdname = "CB",
	+ .file = FILE_LINE,
	+ .ucdname = "CB",
	},
	{
	.enumname = "CL",
	- .file = FILE_LINE,
	- .ucdname = "CL",
	+ .file = FILE_LINE,
	+ .ucdname = "CL",
	},
	{
	.enumname = "CM",
	- .file = FILE_LINE,
	- .ucdname = "CM",
	+ .file = FILE_LINE,
	+ .ucdname = "CM",
	},
	{
	.enumname = "CP_WITHOUT_EAW_HWF",
	- .file = FILE_LINE,
	- .ucdname = "CP",
	+ .file = FILE_LINE,
	+ .ucdname = "CP",
	},
	{
	.enumname = "CP_WITH_EAW_HWF",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "CR",
	- .file = FILE_LINE,
	- .ucdname = "CR",
	+ .file = FILE_LINE,
	+ .ucdname = "CR",
	},
	{
	.enumname = "EB",
	- .file = FILE_LINE,
	- .ucdname = "EB",
	+ .file = FILE_LINE,
	+ .ucdname = "EB",
	},
	{
	.enumname = "EM",
	- .file = FILE_LINE,
	- .ucdname = "EM",
	+ .file = FILE_LINE,
	+ .ucdname = "EM",
	},
	{
	.enumname = "EX",
	- .file = FILE_LINE,
	- .ucdname = "EX",
	+ .file = FILE_LINE,
	+ .ucdname = "EX",
	},
	{
	.enumname = "GL",
	- .file = FILE_LINE,
	- .ucdname = "GL",
	+ .file = FILE_LINE,
	+ .ucdname = "GL",
	},
	{
	.enumname = "H2",
	- .file = FILE_LINE,
	- .ucdname = "H2",
	+ .file = FILE_LINE,
	+ .ucdname = "H2",
	},
	{
	.enumname = "H3",
	- .file = FILE_LINE,
	- .ucdname = "H3",
	+ .file = FILE_LINE,
	+ .ucdname = "H3",
	},
	{
	.enumname = "HL",
	- .file = FILE_LINE,
	- .ucdname = "HL",
	+ .file = FILE_LINE,
	+ .ucdname = "HL",
	},
	{
	.enumname = "HY",
	- .file = FILE_LINE,
	- .ucdname = "HY",
	+ .file = FILE_LINE,
	+ .ucdname = "HY",
	},
	{
	.enumname = "ID",
	- .file = FILE_LINE,
	- .ucdname = "ID",
	+ .file = FILE_LINE,
	+ .ucdname = "ID",
	},
	{
	.enumname = "IN",
	- .file = FILE_LINE,
	- .ucdname = "IN",
	+ .file = FILE_LINE,
	+ .ucdname = "IN",
	},
	{
	.enumname = "IS",
	- .file = FILE_LINE,
	- .ucdname = "IS",
	+ .file = FILE_LINE,
	+ .ucdname = "IS",
	},
	{
	.enumname = "JL",
	- .file = FILE_LINE,
	- .ucdname = "JL",
	+ .file = FILE_LINE,
	+ .ucdname = "JL",
	},
	{
	.enumname = "JT",
	- .file = FILE_LINE,
	- .ucdname = "JT",
	+ .file = FILE_LINE,
	+ .ucdname = "JT",
	},
	{
	.enumname = "JV",
	- .file = FILE_LINE,
	- .ucdname = "JV",
	+ .file = FILE_LINE,
	+ .ucdname = "JV",
	},
	{
	.enumname = "LF",
	- .file = FILE_LINE,
	- .ucdname = "LF",
	+ .file = FILE_LINE,
	+ .ucdname = "LF",
	},
	{
	.enumname = "NL",
	- .file = FILE_LINE,
	- .ucdname = "NL",
	+ .file = FILE_LINE,
	+ .ucdname = "NL",
	},
	{
	.enumname = "NS",
	- .file = FILE_LINE,
	- .ucdname = "NS",
	+ .file = FILE_LINE,
	+ .ucdname = "NS",
	},
	{
	.enumname = "NU",
	- .file = FILE_LINE,
	- .ucdname = "NU",
	+ .file = FILE_LINE,
	+ .ucdname = "NU",
	},
	{
	.enumname = "OP_WITHOUT_EAW_HWF",
	- .file = FILE_LINE,
	- .ucdname = "OP",
	+ .file = FILE_LINE,
	+ .ucdname = "OP",
	},
	{
	.enumname = "OP_WITH_EAW_HWF",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "PO",
	- .file = FILE_LINE,
	- .ucdname = "PO",
	+ .file = FILE_LINE,
	+ .ucdname = "PO",
	},
	{
	.enumname = "PR",
	- .file = FILE_LINE,
	- .ucdname = "PR",
	+ .file = FILE_LINE,
	+ .ucdname = "PR",
	},
	{
	.enumname = "QU",
	- .file = FILE_LINE,
	- .ucdname = "QU",
	+ .file = FILE_LINE,
	+ .ucdname = "QU",
	},
	{
	.enumname = "RI",
	- .file = FILE_LINE,
	- .ucdname = "RI",
	+ .file = FILE_LINE,
	+ .ucdname = "RI",
	},
	{
	.enumname = "SP",
	- .file = FILE_LINE,
	- .ucdname = "SP",
	+ .file = FILE_LINE,
	+ .ucdname = "SP",
	},
	{
	.enumname = "SY",
	- .file = FILE_LINE,
	- .ucdname = "SY",
	+ .file = FILE_LINE,
	+ .ucdname = "SY",
	},
	{
	.enumname = "WJ",
	- .file = FILE_LINE,
	- .ucdname = "WJ",
	+ .file = FILE_LINE,
	+ .ucdname = "WJ",
	},
	{
	.enumname = "ZW",
	- .file = FILE_LINE,
	- .ucdname = "ZW",
	+ .file = FILE_LINE,
	+ .ucdname = "ZW",
	},
	{
	.enumname = "ZWJ",
	- .file = FILE_LINE,
	- .ucdname = "ZWJ",
	+ .file = FILE_LINE,
	+ .ucdname = "ZWJ",
	},
	{
	.enumname = "TMP_AI",
	- .file = FILE_LINE,
	- .ucdname = "AI",
	+ .file = FILE_LINE,
	+ .ucdname = "AI",
	},
	{
	.enumname = "TMP_CJ",
	- .file = FILE_LINE,
	- .ucdname = "CJ",
	+ .file = FILE_LINE,
	+ .ucdname = "CJ",
	},
	{
	.enumname = "TMP_XX",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "TMP_MN",
	- .file = FILE_LINE,
	- .ucdname = "Mn",
	+ .file = FILE_LINE,
	+ .ucdname = "Mn",
	},
	{
	.enumname = "TMP_MC",
	- .file = FILE_LINE,
	- .ucdname = "Mc",
	+ .file = FILE_LINE,
	+ .ucdname = "Mc",
	},
	{
	.enumname = "TMP_SA_WITHOUT_MN_OR_MC",
	- .file = FILE_LINE,
	- .ucdname = "SA",
	+ .file = FILE_LINE,
	+ .ucdname = "SA",
	},
	{
	.enumname = "TMP_SA_WITH_MN_OR_MC",
	- .file = FILE_LINE,
	- .ucdname = "SA",
	+ .file = FILE_LINE,
	+ .ucdname = "SA",
	},
	{
	.enumname = "TMP_SG",
	- .file = FILE_LINE,
	- .ucdname = "SG",
	+ .file = FILE_LINE,
	+ .ucdname = "SG",
	},
	{
	.enumname = "TMP_EAW_H",
	- .file = FILE_EAW,
	- .ucdname = "H",
	+ .file = FILE_EAW,
	+ .ucdname = "H",
	},
	{
	.enumname = "TMP_EAW_W",
	- .file = FILE_EAW,
	- .ucdname = "W",
	+ .file = FILE_EAW,
	+ .ucdname = "W",
	},
	{
	.enumname = "TMP_EAW_F",
	- .file = FILE_EAW,
	- .ucdname = "F",
	+ .file = FILE_EAW,
	+ .ucdname = "F",
	},
	};

	@@ -306,23 +306,30 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u…

	(void)cp;

	- if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") \|\|
	- !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") \|\|
	+ if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") \|\|
	+ !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") \|\|
	!strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) \|\|
	(!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") \|\|
	!strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") \|\|
	!strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) {
	- if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_E…
	- !strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_E…
	+ if (!strcmp(line_break_property[prop1].enumname,
	+ "CP_WITHOUT_EAW_HWF") \|\|
	+ !strcmp(line_break_property[prop2].enumname,
	+ "CP_WITHOUT_EAW_HWF")) {
	target = "CP_WITH_EAW_HWF";
	- } else if (!strcmp(line_break_property[prop1].enumname, "OP_WI…
	- !strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_E…
	+ } else if (!strcmp(line_break_property[prop1].enumname,
	+ "OP_WITHOUT_EAW_HWF") \|\|
	+ !strcmp(line_break_property[prop2].enumname,
	+ "OP_WITHOUT_EAW_HWF")) {
	target = "OP_WITH_EAW_HWF";
	} else {
	/* ignore EAW for the rest */
	- if ((!strcmp(line_break_property[prop1].enumname, "TMP…
	- !strcmp(line_break_property[prop1].enumname, "TMP…
	- !strcmp(line_break_property[prop1].enumname, "TMP…
	+ if ((!strcmp(line_break_property[prop1].enumname,
	+ "TMP_EAW_H") \|\|
	+ !strcmp(line_break_property[prop1].enumname,
	+ "TMP_EAW_W") \|\|
	+ !strcmp(line_break_property[prop1].enumname,
	+ "TMP_EAW_F"))) {
	result = prop2;
	} else {
	result = prop1;
	@@ -330,15 +337,19 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u…
	}
	} else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") \|\|
	!strcmp(line_break_property[prop1].enumname, "TMP_MC")) \|\|
	- (!strcmp(line_break_property[prop2].enumname, "TMP_MN") \|\|
	- !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
	- if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_M…
	- !strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_M…
	+ (!strcmp(line_break_property[prop2].enumname, "TMP_MN") \|\|
	+ !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
	+ if (!strcmp(line_break_property[prop1].enumname,
	+ "SA_WITHOUT_MN_OR_MC") \|\|
	+ !strcmp(line_break_property[prop2].enumname,
	+ "SA_WITHOUT_MN_OR_MC")) {
	target = "SA_WITH_MN_OR_MC";
	} else {
	/* ignore Mn and Mc for the rest */
	- if ((!strcmp(line_break_property[prop1].enumname, "TMP…
	- !strcmp(line_break_property[prop1].enumname, "TMP…
	+ if ((!strcmp(line_break_property[prop1].enumname,
	+ "TMP_MN") \|\|
	+ !strcmp(line_break_property[prop1].enumname,
	+ "TMP_MC"))) {
	result = prop2;
	} else {
	result = prop1;
	@@ -346,33 +357,42 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, u…
	}
	} else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") \|\|
	!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
	- if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED…
	- !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED…
	+ if (!strcmp(line_break_property[prop1].enumname,
	+ "TMP_EXTENDED_PICTOGRAPHIC") \|\|
	+ !strcmp(line_break_property[prop2].enumname,
	+ "TMP_EXTENDED_PICTOGRAPHIC")) {
	target = "BOTH_CN_EXTPICT";
	} else {
	/* ignore Cn for all the other properties */
	- if (!strcmp(line_break_property[prop1].enumname, "TMP_…
	+ if (!strcmp(line_break_property[prop1].enumname,
	+ "TMP_CN")) {
	result = prop2;
	} else {
	result = prop1;
	}
	}
	- } else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_…
	- !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_…
	+ } else if (!strcmp(line_break_property[prop1].enumname,
	+ "TMP_EXTENDED_PICTOGRAPHIC") \|\|
	+ !strcmp(line_break_property[prop2].enumname,
	+ "TMP_EXTENDED_PICTOGRAPHIC")) {
	if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") \|\|
	!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
	target = "BOTH_CN_EXTPICT";
	} else {
	- /* ignore Extended_Pictographic for all the other prop…
	- if (!strcmp(line_break_property[prop1].enumname, "TMP_…
	+ /* ignore Extended_Pictographic for all the other
	+ * properties */
	+ if (!strcmp(line_break_property[prop1].enumname,
	+ "TMP_EXTENDED_PICTOGRAPHIC")) {
	result = prop2;
	} else {
	result = prop1;
	}
	}
	} else {
	- fprintf(stderr, "handle_conflict: Cannot handle conflict %s <-…
	- line_break_property[prop1].enumname, line_break_proper…
	+ fprintf(stderr,
	+ "handle_conflict: Cannot handle conflict %s <- %s.\n",
	+ line_break_property[prop1].enumname,
	+ line_break_property[prop2].enumname);
	exit(1);
	}

	@@ -402,27 +422,44 @@ post_process(struct properties *prop)
	/* post-mapping according to the line breaking algorithm */
	for (i = 0; i < UINT32_C(0x110000); i++) {
	/* LB1 */
	- if (!strcmp(line_break_property[prop[i].property].enumname, "T…
	- !strcmp(line_break_property[prop[i].property].enumname, "T…
	- !strcmp(line_break_property[prop[i].property].enumname, "T…
	+ if (!strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_AI") \|\|
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_SG") \|\|
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_XX")) {
	/* map AI, SG and XX to AL */
	target = "AL";
	- } else if (!strcmp(line_break_property[prop[i].property].enumn…
	+ } else if (!strcmp(line_break_property[prop[i].property]
	+ .enumname,
	+ "TMP_SA_WITH_MN_OR_MC")) {
	/* map SA (with General_Category Mn or Mc) to CM */
	target = "CM";
	- } else if (!strcmp(line_break_property[prop[i].property].enumn…
	+ } else if (!strcmp(line_break_property[prop[i].property]
	+ .enumname,
	+ "TMP_SA_WITHOUT_MN_OR_MC")) {
	/* map SA (without General_Category Mn or Mc) to AL */
	target = "AL";
	- } else if (!strcmp(line_break_property[prop[i].property].enumn…
	+ } else if (!strcmp(line_break_property[prop[i].property]
	+ .enumname,
	+ "TMP_CJ")) {
	/* map CJ to NS */
	target = "NS";
	- } else if (!strcmp(line_break_property[prop[i].property].enumn…
	- !strcmp(line_break_property[prop[i].property].enumn…
	- !strcmp(line_break_property[prop[i].property].enumn…
	- !strcmp(line_break_property[prop[i].property].enumn…
	- !strcmp(line_break_property[prop[i].property].enumn…
	- !strcmp(line_break_property[prop[i].property].enumn…
	- !strcmp(line_break_property[prop[i].property].enumn…
	+ } else if (
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_CN") \|\|
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_EXTENDED_PICTOGRAPHIC") \|\|
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_MN") \|\|
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_MC") \|\|
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_EAW_H") \|\|
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_EAW_W") \|\|
	+ !strcmp(line_break_property[prop[i].property].enumname,
	+ "TMP_EAW_F")) {
	/* map all the temporary classes "residue" to AL */
	target = "AL";
	} else {
	@@ -430,14 +467,17 @@ post_process(struct properties *prop)
	}

	if (target) {
	- for (result = 0; result < LEN(line_break_property); re…
	- if (!strcmp(line_break_property[result].enumna…
	+ for (result = 0; result < LEN(line_break_property);
	+ result++) {
	+ if (!strcmp(line_break_property[result]
	+ .enumname,
	target)) {
	break;
	}
	}
	if (result == LEN(line_break_property)) {
	- fprintf(stderr, "handle_conflict: Internal err…
	+ fprintf(stderr,
	+ "handle_conflict: Internal error.\n");
	exit(1);
	}

	@@ -451,10 +491,9 @@ main(int argc, char *argv[])
	{
	(void)argc;

	- properties_generate_break_property(line_break_property,
	- LEN(line_break_property), NULL,
	- handle_conflict, post_process,
	- "line_break", argv[0]);
	+ properties_generate_break_property(
	+ line_break_property, LEN(line_break_property), NULL,
	+ handle_conflict, post_process, "line_break", argv[0]);

	return 0;
	}
	diff --git a/gen/sentence.c b/gen/sentence.c
	@@ -6,78 +6,78 @@
	static const struct property_spec sentence_break_property[] = {
	{
	.enumname = "OTHER",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "CR",
	- .file = FILE_SENTENCE,
	- .ucdname = "CR",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "CR",
	},
	{
	.enumname = "LF",
	- .file = FILE_SENTENCE,
	- .ucdname = "LF",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "LF",
	},
	{
	.enumname = "EXTEND",
	- .file = FILE_SENTENCE,
	- .ucdname = "Extend",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "Extend",
	},
	{
	.enumname = "SEP",
	- .file = FILE_SENTENCE,
	- .ucdname = "Sep",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "Sep",
	},
	{
	.enumname = "FORMAT",
	- .file = FILE_SENTENCE,
	- .ucdname = "Format",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "Format",
	},
	{
	.enumname = "SP",
	- .file = FILE_SENTENCE,
	- .ucdname = "Sp",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "Sp",
	},
	{
	.enumname = "LOWER",
	- .file = FILE_SENTENCE,
	- .ucdname = "Lower",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "Lower",
	},
	{
	.enumname = "UPPER",
	- .file = FILE_SENTENCE,
	- .ucdname = "Upper",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "Upper",
	},
	{
	.enumname = "OLETTER",
	- .file = FILE_SENTENCE,
	- .ucdname = "OLetter",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "OLetter",
	},
	{
	.enumname = "NUMERIC",
	- .file = FILE_SENTENCE,
	- .ucdname = "Numeric",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "Numeric",
	},
	{
	.enumname = "ATERM",
	- .file = FILE_SENTENCE,
	- .ucdname = "ATerm",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "ATerm",
	},
	{
	.enumname = "SCONTINUE",
	- .file = FILE_SENTENCE,
	- .ucdname = "SContinue",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "SContinue",
	},
	{
	.enumname = "STERM",
	- .file = FILE_SENTENCE,
	- .ucdname = "STerm",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "STerm",
	},
	{
	.enumname = "CLOSE",
	- .file = FILE_SENTENCE,
	- .ucdname = "Close",
	+ .file = FILE_SENTENCE,
	+ .ucdname = "Close",
	},
	};

	@@ -86,9 +86,9 @@ main(int argc, char *argv[])
	{
	(void)argc;

	- properties_generate_break_property(sentence_break_property,
	- LEN(sentence_break_property), NULL,
	- NULL, NULL, "sentence_break", argv[…
	+ properties_generate_break_property(
	+ sentence_break_property, LEN(sentence_break_property), NULL,
	+ NULL, NULL, "sentence_break", argv[0]);

	return 0;
	}
	diff --git a/gen/util.c b/gen/util.c
	@@ -1,13 +1,12 @@
	/* See LICENSE file for copyright and license details. */
	-#include <stdbool.h>
	#include <ctype.h>
	#include <errno.h>
	#include <inttypes.h>
	#include <stdbool.h>
	#include <stddef.h>
	#include <stdint.h>
	-#include <stdlib.h>
	#include <stdio.h>
	+#include <stdlib.h>
	#include <string.h>

	#include "util.h"
	@@ -21,12 +20,13 @@ struct properties_payload {
	struct properties *prop;
	const struct property_spec *spec;
	uint_least8_t speclen;
	- int (set_value)(struct properties_payload , uint_least32_t, int_leas…
	- uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, uint_l…
	+ int (set_value)(struct properties_payload , uint_least32_t,
	+ int_least64_t);
	+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
	+ uint_least8_t);
	};

	-struct break_test_payload
	-{
	+struct break_test_payload {
	struct break_test **test;
	size_t *testlen;
	};
	@@ -51,8 +51,8 @@ hextocp(const char str, size_t len, uint_least32_t cp)

	/* the maximum valid codepoint is 0x10FFFF */
	if (len > 6) {
	- fprintf(stderr, "hextocp: '%.*s' is too long.\n",
	- (int)len, str);
	+ fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len,
	+ str);
	return 1;
	}

	@@ -77,8 +77,8 @@ hextocp(const char str, size_t len, uint_least32_t cp)
	}

	if (*cp > UINT32_C(0x10FFFF)) {
	- fprintf(stderr, "hextocp: '%.*s' is too large.\n",
	- (int)len, str);
	+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len,
	+ str);
	return 1;
	}

	@@ -98,8 +98,10 @@ parse_cp_list(const char str, uint_least32_t cp, size_t …
	}

	/* count the number of spaces in the string and infer list length */
	- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count+…
	+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
	+ count++, tmp1 = tmp2 + 1) {
	;
	+ }

	/* allocate resources */
	if (!(cp = calloc((cplen = count), sizeof(**cp)))) {
	@@ -110,7 +112,8 @@ parse_cp_list(const char str, uint_least32_t *cp, size_t …
	/* go through the string again, parsing the numbers */
	for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
	tmp2 = strchr(tmp1, ' ');
	- if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),…
	+ if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
	+ &((*cp)[i]))) {
	return 1;
	}
	if (tmp2 != NULL) {
	@@ -144,8 +147,10 @@ range_parse(const char str, struct range range)
	}

	void
	-parse_file_with_callback(const char fname, int (callback)(const char *,
	- char *, size_t, char , void ), void payload)
	+parse_file_with_callback(const char *fname,
	+ int (callback)(const char , char *, size_t, char ,
	+ void *),
	+ void *payload)
	{
	FILE *fp;
	char line = NULL, field = NULL, comment;
	@@ -182,10 +187,15 @@ parse_file_with_callback(const char fname, int (callbac…
	if (line[i] != '#') {
	/* extend field buffer, if necessary */
	if (++nfields > fieldbufsize) {
	- if ((field = realloc(field, nfields *
	- sizeof(*field))) == NULL) {
	- fprintf(stderr, "parse_file_wi…
	- "callback: realloc: %s…
	+ if ((field = realloc(
	+ field,
	+ nfields *
	+ sizeof(*field))) …
	+ NULL) {
	+ fprintf(stderr,
	+ "parse_file_with_"
	+ "callback: realloc: "
	+ "%s.\n",
	strerror(errno));
	exit(1);
	}
	@@ -209,8 +219,9 @@ parse_file_with_callback(const char fname, int (callback)…

	/* go back whitespace and terminate field there */
	if (i > 0) {
	- for (j = i - 1; line[j] == ' '; j--)
	+ for (j = i - 1; line[j] == ' '; j--) {
	;
	+ }
	line[j + 1] = '\0';
	} else {
	line[i] = '\0';
	@@ -230,7 +241,7 @@ parse_file_with_callback(const char fname, int (callback)…
	/* call callback function */
	if (callback(fname, field, nfields, comment, payload)) {
	fprintf(stderr, "parse_file_with_callback: "
	- "Malformed input.\n");
	+ "Malformed input.\n");
	exit(1);
	}
	}
	@@ -257,10 +268,11 @@ properties_callback(const char file, char *field, size_…

	for (i = 0; i < p->speclen; i++) {
	/* identify fitting file and identifier */
	- if (p->spec[i].file &&
	- !strcmp(p->spec[i].file, file) &&
	+ if (p->spec[i].file && !strcmp(p->spec[i].file, file) &&
	(!strcmp(p->spec[i].ucdname, field[1]) \|\|
	- (comment != NULL && !strncmp(p->spec[i].ucdname, comment,…
	+ (comment != NULL &&
	+ !strncmp(p->spec[i].ucdname, comment,
	+ strlen(p->spec[i].ucdname)) &&
	comment[strlen(p->spec[i].ucdname)] == ' '))) {
	/* parse range in first field */
	if (range_parse(field[0], &r)) {
	@@ -287,7 +299,8 @@ properties_compress(const struct properties *prop,
	uint_least32_t cp, i;

	/* initialization */
	- if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * sizeof(*(comp…
	+ if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) *
	+ sizeof(*(comp->offset))))) {
	fprintf(stderr, "malloc: %s\n", strerror(errno));
	exit(1);
	}
	@@ -296,7 +309,8 @@ properties_compress(const struct properties *prop,

	for (cp = 0; cp < UINT32_C(0x110000); cp++) {
	for (i = 0; i < comp->datalen; i++) {
	- if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*pro…
	+ if (!memcmp(&(prop[cp]), &(comp->data[i]),
	+ sizeof(*prop))) {
	/* found a match! */
	comp->offset[cp] = i;
	break;
	@@ -308,9 +322,9 @@ properties_compress(const struct properties *prop,
	* add current properties to data and add the
	* offset in the offset-table
	*/
	- if (!(comp->data = reallocate_array(comp->data,
	- ++(comp->datalen),
	- sizeof(*(comp->dat…
	+ if (!(comp->data = reallocate_array(
	+ comp->data, ++(comp->datalen),
	+ sizeof(*(comp->data))))) {
	fprintf(stderr, "reallocate_array: %s\n",
	strerror(errno));
	exit(1);
	@@ -357,8 +371,7 @@ properties_get_major_minor(const struct properties_compress…
	* and need less storage)
	*/
	for (j = 0; j + 0xFF < mm->minorlen; j++) {
	- if (!memcmp(&(comp->offset[i << 8]),
	- &(mm->minor[j]),
	+ if (!memcmp(&(comp->offset[i << 8]), &(mm->minor[j]),
	sizeof((comp->offset)) 0x100)) {
	break;
	}
	@@ -373,9 +386,9 @@ properties_get_major_minor(const struct properties_compress…
	* in major
	*/
	mm->minorlen += 0x100;
	- if (!(mm->minor = reallocate_array(mm->minor,
	- mm->minorlen,
	- sizeof(*(mm->minor)…
	+ if (!(mm->minor =
	+ reallocate_array(mm->minor, mm->minorlen,
	+ sizeof(*(mm->minor)))))…
	fprintf(stderr, "reallocate_array: %s\n",
	strerror(errno));
	exit(1);
	@@ -403,7 +416,7 @@ properties_print_lookup_table(char name, size_t data, siz…
	}
	}

	- type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
	+ type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
	(maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
	(maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
	"uint_least64_t";
	@@ -418,21 +431,21 @@ properties_print_lookup_table(char name, size_t data, s…
	} else {
	printf(",\n\t");
	}
	-
	}
	printf("};\n");
	}

	void
	-properties_print_derived_lookup_table(char name, char type, size_t *offset, …
	- int_least64_t (*get_value)(const struct …
	- size_t), const void *payload)
	+properties_print_derived_lookup_table(
	+ char name, char type, size_t *offset, size_t offsetlen,
	+ int_least64_t (get_value)(const struct properties , size_t),
	+ const void *payload)
	{
	size_t i;

	printf("static const %s %s[] = {\n\t", type, name);
	for (i = 0; i < offsetlen; i++) {
	- printf("%"PRIiLEAST64, get_value(payload, offset[i]));
	+ printf("%" PRIiLEAST64, get_value(payload, offset[i]));
	if (i + 1 == offsetlen) {
	printf("\n");
	} else if ((i + 1) % 8 != 0) {
	@@ -440,7 +453,6 @@ properties_print_derived_lookup_table(char name, char typ…
	} else {
	printf(",\n\t");
	}
	-
	}
	printf("};\n");
	}
	@@ -464,17 +476,19 @@ set_value_bp(struct properties_payload *payload, uint_lea…
	{
	if (payload->prop[cp].property != payload->speclen) {
	if (payload->handle_conflict == NULL) {
	- fprintf(stderr, "set_value_bp: "
	- "Unhandled character break property "
	+ fprintf(stderr,
	+ "set_value_bp: "
	+ "Unhandled character break property "
	"overwrite for 0x%06X (%s <- %s).\n",
	- cp, payload->spec[payload->prop[cp].
	- property].enumname,
	+ cp,
	+ payload->spec[payload->prop[cp].property]
	+ .enumname,
	payload->spec[value].enumname);
	return 1;
	} else {
	- value = payload->handle_conflict(cp,
	- (uint_least8_t)payload->prop[cp].property,
	- (uint_least8_t)value);
	+ value = payload->handle_conflict(
	+ cp, (uint_least8_t)payload->prop[cp].property,
	+ (uint_least8_t)value);
	}
	}
	payload->prop[cp].property = value;
	@@ -489,15 +503,13 @@ get_value_bp(const struct properties *prop, size_t offset)
	}

	void
	-properties_generate_break_property(const struct property_spec *spec,
	- uint_least8_t speclen,
	- uint_least8_t (*fill_missing)(
	- uint_least32_t),
	- uint_least8_t (*handle_conflict)(
	- uint_least32_t, uint_least8_t,
	- uint_least8_t), void
	- (post_process)(struct properties ),
	- const char prefix, const char argv0)
	+properties_generate_break_property(
	+ const struct property_spec *spec, uint_least8_t speclen,
	+ uint_least8_t (*fill_missing)(uint_least32_t),
	+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
	+ uint_least8_t),
	+ void (post_process)(struct properties ), const char *prefix,
	+ const char *argv0)
	{
	struct properties_compressed comp;
	struct properties_major_minor mm;
	@@ -537,8 +549,7 @@ properties_generate_break_property(const struct property_sp…
	if (i == j && spec[i].file) {
	/* file has not been processed yet */
	parse_file_with_callback(spec[i].file,
	- properties_callback,
	- &payload);
	+ properties_callback, &payload…
	}
	}

	@@ -546,7 +557,8 @@ properties_generate_break_property(const struct property_sp…
	for (i = 0; i < UINT32_C(0x110000); i++) {
	if (payload.prop[i].property == speclen) {
	if (fill_missing != NULL) {
	- payload.prop[i].property = fill_missing((uint_…
	+ payload.prop[i].property =
	+ fill_missing((uint_least32_t)i);
	} else {
	payload.prop[i].property = 0;
	}
	@@ -559,14 +571,16 @@ properties_generate_break_property(const struct property_…
	}

	/* compress data */
	- printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n",…
	+ printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n",
	+ argv0);
	properties_compress(prop, &comp);

	- fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0,
	- prefix, properties_get_major_minor(&comp, &mm));
	+ fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0, prefi…
	+ properties_get_major_minor(&comp, &mm));

	/* prepare names */
	- if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= LEN(bu…
	+ if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >=
	+ LEN(buf1)) {
	fprintf(stderr, "snprintf: String truncated.\n");
	exit(1);
	}
	@@ -578,9 +592,12 @@ properties_generate_break_property(const struct property_s…
	prefix_uc[i] = (char)toupper(prefix[i]);
	}
	prefix_uc[prefixlen] = '\0';
	- if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= LEN(buf…
	- (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= LEN(buf3)…
	- (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= LEN(buf4)…
	+ if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >=
	+ LEN(buf2) \|\|
	+ (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >=
	+ LEN(buf3) \|\|
	+ (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >=
	+ LEN(buf4)) {
	fprintf(stderr, "snprintf: String truncated.\n");
	exit(1);
	}
	@@ -589,8 +606,9 @@ properties_generate_break_property(const struct property_sp…
	properties_print_enum(spec, speclen, buf1, buf2);
	properties_print_lookup_table(buf3, mm.major, 0x1100);
	printf("\n");
	- properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor,…
	- get_value_bp, comp.data);
	+ properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor,
	+ mm.minorlen, get_value_bp,
	+ comp.data);

	/* free data */
	free(prop);
	@@ -625,42 +643,50 @@ break_test_callback(const char fname, char *field, size…
	memset(t, 0, sizeof(*t));

	/* parse testcase "<÷\|×> <cp> <÷\|×> ... <cp> <÷\|×>" */
	- for (token = strtok(field[0], " "), i = 0; token != NULL; i++,
	- token = strtok(NULL, " ")) {
	+ for (token = strtok(field[0], " "), i = 0; token != NULL;
	+ i++, token = strtok(NULL, " ")) {
	if (i % 2 == 0) {
	/* delimiter or start of sequence */
	- if (i == 0 \|\| !strncmp(token, "\xC3\xB7", 2)) { /* UTF…
	+ if (i == 0 \|\|
	+ !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
	/*
	* '÷' indicates a breakpoint,
	* the current length is done; allocate
	* a new length field and set it to 0
	*/
	- if ((t->len = realloc(t->len,
	- ++t->lenlen * sizeof(*t->len))) == NULL) {
	- fprintf(stderr, "break_test_"
	+ if ((t->len = realloc(
	+ t->len,
	+ ++t->lenlen * sizeof(*t->len))) ==
	+ NULL) {
	+ fprintf(stderr,
	+ "break_test_"
	"callback: realloc: %s.\n",
	strerror(errno));
	return 1;
	}
	t->len[t->lenlen - 1] = 0;
	} else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 …
	- /*
	- * '×' indicates a non-breakpoint, do nothing
	- */
	+ /* '×' indicates a non-breakpoint, do nothing…
	} else {
	- fprintf(stderr, "break_test_callback: "
	- "Malformed delimiter '%s'.\n", token);
	+ fprintf(stderr,
	+ "break_test_callback: "
	+ "Malformed delimiter '%s'.\n",
	+ token);
	return 1;
	}
	} else {
	/* add codepoint to cp-array */
	- if ((t->cp = realloc(t->cp, ++t->cplen *
	- sizeof(*t->cp))) == NULL) {
	- fprintf(stderr, "break_test_callback: "
	- "realloc: %s.\n", strerror(errno));
	+ if ((t->cp = realloc(t->cp,
	+ ++t->cplen * sizeof(*t->cp))) ==
	+ NULL) {
	+ fprintf(stderr,
	+ "break_test_callback: "
	+ "realloc: %s.\n",
	+ strerror(errno));
	return 1;
	}
	- if (hextocp(token, strlen(token), &t->cp[t->cplen - 1]…
	+ if (hextocp(token, strlen(token),
	+ &t->cp[t->cplen - 1])) {
	return 1;
	}
	if (t->lenlen > 0) {
	@@ -688,8 +714,7 @@ break_test_callback(const char fname, char *field, size_t…
	}

	void
	-break_test_list_parse(char fname, struct break_test *test,
	- size_t *testlen)
	+break_test_list_parse(char fname, struct break_test test, size_t testlen)
	{
	struct break_test_payload pl = {
	.test = test,
	@@ -703,13 +728,14 @@ break_test_list_parse(char fname, struct break_test *te…

	void
	break_test_list_print(const struct break_test *test, size_t testlen,
	- const char identifier, const char progname)
	+ const char identifier, const char progname)
	{
	size_t i, j;

	printf("/* Automatically generated by %s */\n"
	"#include <stdint.h>\n#include <stddef.h>\n\n"
	- "#include \"../gen/types.h\"\n\n", progname);
	+ "#include \"../gen/types.h\"\n\n",
	+ progname);

	printf("static const struct break_test %s[] = {\n", identifier);
	for (i = 0; i < testlen; i++) {
	diff --git a/gen/util.h b/gen/util.h
	@@ -7,7 +7,7 @@

	#include "types.h"

	-#define LEN(x) (sizeof (x) / sizeof *(x))
	+#define LEN(x) (sizeof(x) / sizeof *(x))

	struct property_spec {
	const char *enumname;
	@@ -34,30 +34,31 @@ struct properties_major_minor {
	int hextocp(const char , size_t, uint_least32_t cp);
	int parse_cp_list(const char , uint_least32_t , size_t );

	-void parse_file_with_callback(const char , int (callback)(const char *,
	- char *, size_t, char , void ), void payload);
	+void parse_file_with_callback(const char *,
	+ int (callback)(const char , char **, size_t,
	+ char , void ),
	+ void *payload);

	-void properties_compress(const struct properties *, struct properties_compress…
	+void properties_compress(const struct properties *,
	+ struct properties_compressed *comp);
	double properties_get_major_minor(const struct properties_compressed *,
	struct properties_major_minor *);
	void properties_print_lookup_table(char , size_t , size_t);
	-void properties_print_derived_lookup_table(char , char , size_t *, size_t,
	- int_least64_t (*get_value)(const struct …
	- size_t), const void *);
	-
	-void properties_generate_break_property(const struct property_spec *,
	- uint_least8_t, uint_least8_t
	- (*fill_missing)(uint_least32_t),
	- uint_least8_t
	- (*handle_conflict)(uint_least32_t,
	- uint_least8_t, uint_least8_t),
	- void (*post_process)
	- (struct properties *),
	- const char , const char );
	+void properties_print_derived_lookup_table(
	+ char , char , size_t *, size_t,
	+ int_least64_t (get_value)(const struct properties , size_t),
	+ const void *);
	+
	+void properties_generate_break_property(
	+ const struct property_spec *, uint_least8_t,
	+ uint_least8_t (*fill_missing)(uint_least32_t),
	+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
	+ uint_least8_t),
	+ void (post_process)(struct properties ), const char , const char );

	void break_test_list_parse(char , struct break_test , size_t );
	-void break_test_list_print(const struct break_test *, size_t,
	- const char , const char );
	+void break_test_list_print(const struct break_test , size_t, const char ,
	+ const char *);
	void break_test_list_free(struct break_test *, size_t);

	#endif /* UTIL_H */
	diff --git a/gen/word.c b/gen/word.c
	@@ -11,108 +11,108 @@
	static const struct property_spec word_break_property[] = {
	{
	.enumname = "OTHER",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "ALETTER",
	- .file = FILE_WORD,
	- .ucdname = "ALetter",
	+ .file = FILE_WORD,
	+ .ucdname = "ALetter",
	},
	{
	.enumname = "BOTH_ALETTER_EXTPICT",
	- .file = NULL,
	- .ucdname = NULL,
	+ .file = NULL,
	+ .ucdname = NULL,
	},
	{
	.enumname = "CR",
	- .file = FILE_WORD,
	- .ucdname = "CR",
	+ .file = FILE_WORD,
	+ .ucdname = "CR",
	},
	{
	.enumname = "DOUBLE_QUOTE",
	- .file = FILE_WORD,
	- .ucdname = "Double_Quote",
	+ .file = FILE_WORD,
	+ .ucdname = "Double_Quote",
	},
	{
	.enumname = "EXTEND",
	- .file = FILE_WORD,
	- .ucdname = "Extend",
	+ .file = FILE_WORD,
	+ .ucdname = "Extend",
	},
	{
	.enumname = "EXTENDED_PICTOGRAPHIC",
	- .file = FILE_EMOJI,
	- .ucdname = "Extended_Pictographic",
	+ .file = FILE_EMOJI,
	+ .ucdname = "Extended_Pictographic",
	},
	{
	.enumname = "EXTENDNUMLET",
	- .file = FILE_WORD,
	- .ucdname = "ExtendNumLet",
	+ .file = FILE_WORD,
	+ .ucdname = "ExtendNumLet",
	},
	{
	.enumname = "FORMAT",
	- .file = FILE_WORD,
	- .ucdname = "Format",
	+ .file = FILE_WORD,
	+ .ucdname = "Format",
	},
	{
	.enumname = "HEBREW_LETTER",
	- .file = FILE_WORD,
	- .ucdname = "Hebrew_Letter",
	+ .file = FILE_WORD,
	+ .ucdname = "Hebrew_Letter",
	},
	{
	.enumname = "KATAKANA",
	- .file = FILE_WORD,
	- .ucdname = "Katakana",
	+ .file = FILE_WORD,
	+ .ucdname = "Katakana",
	},
	{
	.enumname = "LF",
	- .file = FILE_WORD,
	- .ucdname = "LF",
	+ .file = FILE_WORD,
	+ .ucdname = "LF",
	},
	{
	.enumname = "MIDLETTER",
	- .file = FILE_WORD,
	- .ucdname = "MidLetter",
	+ .file = FILE_WORD,
	+ .ucdname = "MidLetter",
	},
	{
	.enumname = "MIDNUM",
	- .file = FILE_WORD,
	- .ucdname = "MidNum",
	+ .file = FILE_WORD,
	+ .ucdname = "MidNum",
	},
	{
	.enumname = "MIDNUMLET",
	- .file = FILE_WORD,
	- .ucdname = "MidNumLet",
	+ .file = FILE_WORD,
	+ .ucdname = "MidNumLet",
	},
	{
	.enumname = "NEWLINE",
	- .file = FILE_WORD,
	- .ucdname = "Newline",
	+ .file = FILE_WORD,
	+ .ucdname = "Newline",
	},
	{
	.enumname = "NUMERIC",
	- .file = FILE_WORD,
	- .ucdname = "Numeric",
	+ .file = FILE_WORD,
	+ .ucdname = "Numeric",
	},
	{
	.enumname = "REGIONAL_INDICATOR",
	- .file = FILE_WORD,
	- .ucdname = "Regional_Indicator",
	+ .file = FILE_WORD,
	+ .ucdname = "Regional_Indicator",
	},
	{
	.enumname = "SINGLE_QUOTE",
	- .file = FILE_WORD,
	- .ucdname = "Single_Quote",
	+ .file = FILE_WORD,
	+ .ucdname = "Single_Quote",
	},
	{
	.enumname = "WSEGSPACE",
	- .file = FILE_WORD,
	- .ucdname = "WSegSpace",
	+ .file = FILE_WORD,
	+ .ucdname = "WSegSpace",
	},
	{
	.enumname = "ZWJ",
	- .file = FILE_WORD,
	- .ucdname = "ZWJ",
	+ .file = FILE_WORD,
	+ .ucdname = "ZWJ",
	},
	};

	@@ -124,8 +124,10 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, ui…
	(void)cp;

	if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
	- !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPH…
	- (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPH…
	+ !strcmp(word_break_property[prop2].enumname,
	+ "EXTENDED_PICTOGRAPHIC")) \|\|
	+ (!strcmp(word_break_property[prop1].enumname,
	+ "EXTENDED_PICTOGRAPHIC") &&
	!strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
	for (result = 0; result < LEN(word_break_property); result++) {
	if (!strcmp(word_break_property[result].enumname,
	@@ -150,10 +152,9 @@ main(int argc, char *argv[])
	{
	(void)argc;

	- properties_generate_break_property(word_break_property,
	- LEN(word_break_property), NULL,
	- handle_conflict, NULL, "word_break",
	- argv[0]);
	+ properties_generate_break_property(
	+ word_break_property, LEN(word_break_property), NULL,
	+ handle_conflict, NULL, "word_break", argv[0]);

	return 0;
	}
	diff --git a/grapheme.h b/grapheme.h
	@@ -18,14 +18,15 @@ enum grapheme_bidirectional_override {
	size_t grapheme_decode_utf8(const char , size_t, uint_least32_t );
	size_t grapheme_encode_utf8(uint_least32_t, char *, size_t);

	-size_t grapheme_get_bidirectional_embedding_levels(const uint_least32_t *, siz…
	- enum grapheme_bidirectional…
	- int_least32_t *, size_t);
	-size_t grapheme_get_bidirectional_embedding_levels_utf8(const char *, size_t,
	- enum grapheme_bidirect…
	- int_least32_t *, size_…
	+size_t grapheme_get_bidirectional_embedding_levels(
	+ const uint_least32_t *, size_t, enum grapheme_bidirectional_override,
	+ int_least32_t *, size_t);
	+size_t grapheme_get_bidirectional_embedding_levels_utf8(
	+ const char *, size_t, enum grapheme_bidirectional_override,
	+ int_least32_t *, size_t);

	-bool grapheme_is_character_break(uint_least32_t, uint_least32_t, uint_least16_…
	+bool grapheme_is_character_break(uint_least32_t, uint_least32_t,
	+ uint_least16_t *);

	bool grapheme_is_lowercase(const uint_least32_t , size_t, size_t );
	bool grapheme_is_titlecase(const uint_least32_t , size_t, size_t );
	@@ -45,9 +46,12 @@ size_t grapheme_next_line_break_utf8(const char *, size_t);
	size_t grapheme_next_sentence_break_utf8(const char *, size_t);
	size_t grapheme_next_word_break_utf8(const char *, size_t);

	-size_t grapheme_to_lowercase(const uint_least32_t , size_t, uint_least32_t ,…
	-size_t grapheme_to_titlecase(const uint_least32_t , size_t, uint_least32_t ,…
	-size_t grapheme_to_uppercase(const uint_least32_t , size_t, uint_least32_t ,…
	+size_t grapheme_to_lowercase(const uint_least32_t , size_t, uint_least32_t ,
	+ size_t);
	+size_t grapheme_to_titlecase(const uint_least32_t , size_t, uint_least32_t ,
	+ size_t);
	+size_t grapheme_to_uppercase(const uint_least32_t , size_t, uint_least32_t ,
	+ size_t);

	size_t grapheme_to_lowercase_utf8(const char , size_t, char , size_t);
	size_t grapheme_to_titlecase_utf8(const char , size_t, char , size_t);
	diff --git a/src/bidirectional.c b/src/bidirectional.c
	@@ -12,15 +12,18 @@ struct isolate_runner {
	int_least32_t *buf;
	size_t buflen;
	enum bidi_property prev_prop;
	+
	struct {
	size_t off;
	enum bidi_property prop;
	int_least8_t level;
	} cur;
	+
	struct {
	size_t off;
	enum bidi_property prop;
	} next;
	+
	uint_least8_t paragraph_level;
	int_least8_t isolating_run_level;
	enum bidi_property last_strong_type;
	@@ -57,24 +60,42 @@ struct state {
	static inline void
	state_serialize(const struct state s, int_least32_t out)
	{
	- *out = (int_least32_t)(
	- ((((uint_least32_t)(s->paragraph_level)) & 0x01 /* 00000…
	- ((((uint_least32_t)(s->level + 1)) & 0x7F /* 01111…
	- ((((uint_least32_t)(s->prop)) & 0x1F /* 00011…
	- ((((uint_least32_t)(s->bracket - bidi_bracket)) & 0xFF /* 11111…
	- ((((uint_least32_t)(s->visited)) & 0x01 /* 00000…
	- ((((uint_least32_t)(s->rawprop)) & 0x1F /* 00011…
	+ *out = (int_least32_t)(((((uint_least32_t)(s->paragraph_level)) &
	+ 0x01 /* 00000001 */)
	+ << 0) \|
	+ ((((uint_least32_t)(s->level + 1)) &
	+ 0x7F /* 01111111 */)
	+ << 1) \|
	+ ((((uint_least32_t)(s->prop)) &
	+ 0x1F /* 00011111 */)
	+ << 8) \|
	+ ((((uint_least32_t)(s->bracket - bidi_bracket))…
	+ 0xFF /* 11111111 */)
	+ << 13) \|
	+ ((((uint_least32_t)(s->visited)) &
	+ 0x01 /* 00000001 */)
	+ << 21) \|
	+ ((((uint_least32_t)(s->rawprop)) &
	+ 0x1F /* 00011111 */)
	+ << 22));
	}

	static inline void
	state_deserialize(int_least32_t in, struct state *s)
	{
	- s->paragraph_level = (uint_least8_t)((((uint_least32_t)…
	- s->level = (int_least8_t)((((uint_least32_t)…
	- s->prop = (enum bidi_property)((((uint_least32_t)…
	- s->bracket = bidi_bracket + (uint_least8_t)((((uint_least32_t)…
	- s->visited = (bool)((((uint_least32_t)…
	- s->rawprop = (enum bidi_property)((((uint_least32_t)…
	+ s->paragraph_level = (uint_least8_t)((((uint_least32_t)in) >> 0) &
	+ 0x01 /* 00000001 */);
	+ s->level = (int_least8_t)((((uint_least32_t)in) >> 1) &
	+ 0x7F /* 01111111 */) -
	+ 1;
	+ s->prop = (enum bidi_property)((((uint_least32_t)in) >> 8) &
	+ 0x1F /* 00011111 */);
	+ s->bracket =
	+ bidi_bracket + (uint_least8_t)((((uint_least32_t)in) >> 13) &
	+ 0xFF /* 11111111 */);
	+ s->visited = (bool)((((uint_least32_t)in) >> 21) & 0x01 /* 00000001 */…
	+ s->rawprop = (enum bidi_property)((((uint_least32_t)in) >> 22) &
	+ 0x1F /* 00011111 */);
	}

	static void
	@@ -171,7 +192,6 @@ isolate_runner_advance(struct isolate_runner *ir)
	return 1;
	}

	-
	/* shift in */
	ir->prev_prop = ir->cur.prop;
	ir->cur.off = ir->next.off;
	@@ -188,13 +208,13 @@ isolate_runner_advance(struct isolate_runner *ir)
	* on the first advancement as the prev_prop holds the sos type,
	* which can only be either R or L, which are both strong types
	*/
	- if (ir->prev_prop == BIDI_PROP_R \|\|
	- ir->prev_prop == BIDI_PROP_L \|\|
	+ if (ir->prev_prop == BIDI_PROP_R \|\| ir->prev_prop == BIDI_PROP_L \|\|
	ir->prev_prop == BIDI_PROP_AL) {
	ir->last_strong_type = ir->prev_prop;
	}

	- /* initialize next state by going to the next character in the sequenc…
	+ /* initialize next state by going to the next character in the sequence
	+ */
	ir->next.off = SIZE_MAX;
	ir->next.prop = NUM_BIDI_PROPS;

	@@ -210,8 +230,7 @@ isolate_runner_advance(struct isolate_runner *ir)
	}

	/* follow BD8/BD9 and P2 to traverse the current sequence */
	- if (s.prop == BIDI_PROP_LRI \|\|
	- s.prop == BIDI_PROP_RLI \|\|
	+ if (s.prop == BIDI_PROP_LRI \|\| s.prop == BIDI_PROP_RLI \|\|
	s.prop == BIDI_PROP_FSI) {
	/*
	* we encountered an isolate initiator, increment
	@@ -224,8 +243,7 @@ isolate_runner_advance(struct isolate_runner *ir)
	if (isolate_level != 1) {
	continue;
	}
	- } else if (s.prop == BIDI_PROP_PDI &&
	- isolate_level > 0) {
	+ } else if (s.prop == BIDI_PROP_PDI && isolate_level > 0) {
	isolate_level--;

	/*
	@@ -250,12 +268,14 @@ isolate_runner_advance(struct isolate_runner *ir)
	/* we were in the first initializing round */
	continue;
	} else if (s.level == ir->isolating_run_level) {
	- /* isolate_level-skips have been handled before, we're…
	+ /* isolate_level-skips have been handled before, we're
	+ * good */
	/* still in the sequence */
	ir->next.off = (size_t)i;
	ir->next.prop = s.prop;
	} else {
	- /* out of sequence or isolated, compare levels via eos…
	+ /* out of sequence or isolated, compare levels via eos
	+ */
	if (MAX(last_isolate_level, s.level) % 2 == 0) {
	ir->next.prop = BIDI_PROP_L;
	} else {
	@@ -286,7 +306,8 @@ isolate_runner_advance(struct isolate_runner *ir)
	}

	static void
	-isolate_runner_set_current_prop(struct isolate_runner *ir, enum bidi_property …
	+isolate_runner_set_current_prop(struct isolate_runner *ir,
	+ enum bidi_property prop)
	{
	struct state s;

	@@ -301,9 +322,9 @@ static inline enum bidi_property
	get_bidi_property(uint_least32_t cp)
	{
	if (likely(cp <= 0x10FFFF)) {
	- return (enum bidi_property)
	- ((bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) &
	- 0x1F /* 00011111 */);
	+ return (enum bidi_property)(
	+ (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) &
	+ 0x1F /* 00011111 */);
	} else {
	return BIDI_PROP_L;
	}
	@@ -320,8 +341,8 @@ get_bidi_bracket_off(uint_least32_t cp)
	}

	static size_t
	-process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
	- size_t off, uint_least8_t paragraph_level)
	+process_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off,
	+ uint_least8_t paragraph_level)
	{
	enum bidi_property sequence_prop;
	struct isolate_runner ir, tmp;
	@@ -335,7 +356,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…
	ir.prev_prop == BIDI_PROP_RLI \|\|
	ir.prev_prop == BIDI_PROP_FSI \|\|
	ir.prev_prop == BIDI_PROP_PDI) {
	- isolate_runner_set_current_prop(&ir, BIDI_PROP…
	+ isolate_runner_set_current_prop(&ir,
	+ BIDI_PROP_ON);
	} else {
	isolate_runner_set_current_prop(&ir,
	ir.prev_prop);
	@@ -371,7 +393,7 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…
	}

	if (ir.prev_prop == BIDI_PROP_AN &&
	- ir.cur.prop == BIDI_PROP_CS &&
	+ ir.cur.prop == BIDI_PROP_CS &&
	ir.next.prop == BIDI_PROP_AN) {
	isolate_runner_set_current_prop(&ir, BIDI_PROP_AN);
	}
	@@ -389,14 +411,19 @@ process_isolating_run_sequence(int_least32_t *buf, size_t…
	} else if (ir.cur.prop == BIDI_PROP_EN) {
	/* set the preceding sequence */
	if (runsince != SIZE_MAX) {
	- isolate_runner_init(buf, buflen, runsince, par…
	+ isolate_runner_init(buf, buflen, runsince,
	+ paragraph_level,
	+ (runsince > off), &tmp);
	while (!isolate_runner_advance(&tmp) &&
	tmp.cur.off < ir.cur.off) {
	- isolate_runner_set_current_prop(&tmp, …
	+ isolate_runner_set_current_prop(
	+ &tmp, BIDI_PROP_EN);
	}
	runsince = SIZE_MAX;
	} else {
	- isolate_runner_init(buf, buflen, ir.cur.off, p…
	+ isolate_runner_init(buf, buflen, ir.cur.off,
	+ paragraph_level,
	+ (ir.cur.off > off), &tmp);
	isolate_runner_advance(&tmp);
	}
	/* follow the succeeding sequence */
	@@ -404,7 +431,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…
	if (tmp.cur.prop != BIDI_PROP_ET) {
	break;
	}
	- isolate_runner_set_current_prop(&tmp, BIDI_PRO…
	+ isolate_runner_set_current_prop(&tmp,
	+ BIDI_PROP_EN);
	}
	} else {
	/* sequence ended */
	@@ -439,23 +467,26 @@ process_isolating_run_sequence(int_least32_t *buf, size_t…
	isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir);
	while (!isolate_runner_advance(&ir)) {
	if (sequence_end == SIZE_MAX) {
	- if (ir.cur.prop == BIDI_PROP_B \|\|
	- ir.cur.prop == BIDI_PROP_S \|\|
	- ir.cur.prop == BIDI_PROP_WS \|\|
	- ir.cur.prop == BIDI_PROP_ON \|\|
	+ if (ir.cur.prop == BIDI_PROP_B \|\|
	+ ir.cur.prop == BIDI_PROP_S \|\|
	+ ir.cur.prop == BIDI_PROP_WS \|\|
	+ ir.cur.prop == BIDI_PROP_ON \|\|
	ir.cur.prop == BIDI_PROP_FSI \|\|
	ir.cur.prop == BIDI_PROP_LRI \|\|
	ir.cur.prop == BIDI_PROP_RLI \|\|
	ir.cur.prop == BIDI_PROP_PDI) {
	- /* the current character is an NI (neutral or …
	+ /* the current character is an NI (neutral or
	+ * isolate) */

	/* scan ahead to the end of the NI-sequence */
	- isolate_runner_init(buf, buflen, ir.cur.off, p…
	+ isolate_runner_init(buf, buflen, ir.cur.off,
	+ paragraph_level,
	+ (ir.cur.off > off), &tmp);
	while (!isolate_runner_advance(&tmp)) {
	- if (tmp.next.prop != BIDI_PROP_B &&
	- tmp.next.prop != BIDI_PROP_S &&
	- tmp.next.prop != BIDI_PROP_WS &&
	- tmp.next.prop != BIDI_PROP_ON &&
	+ if (tmp.next.prop != BIDI_PROP_B &&
	+ tmp.next.prop != BIDI_PROP_S &&
	+ tmp.next.prop != BIDI_PROP_WS &&
	+ tmp.next.prop != BIDI_PROP_ON &&
	tmp.next.prop != BIDI_PROP_FSI &&
	tmp.next.prop != BIDI_PROP_LRI &&
	tmp.next.prop != BIDI_PROP_RLI &&
	@@ -465,17 +496,17 @@ process_isolating_run_sequence(int_least32_t *buf, size_t…
	}

	/*
	- * check what follows and see if the text has …
	- * same direction on both sides
	+ * check what follows and see if the text has
	+ * the same direction on both sides
	*/
	if (ir.prev_prop == BIDI_PROP_L &&
	tmp.next.prop == BIDI_PROP_L) {
	sequence_end = tmp.cur.off;
	sequence_prop = BIDI_PROP_L;
	- } else if ((ir.prev_prop == BIDI_PROP_R \|\|
	+ } else if ((ir.prev_prop == BIDI_PROP_R \|\|
	ir.prev_prop == BIDI_PROP_EN \|\|
	ir.prev_prop == BIDI_PROP_AN) &&
	- (tmp.next.prop == BIDI_PROP_R \|\|
	+ (tmp.next.prop == BIDI_PROP_R \|\|
	tmp.next.prop == BIDI_PROP_EN \|\|
	tmp.next.prop == BIDI_PROP_AN)) {
	sequence_end = tmp.cur.off;
	@@ -486,7 +517,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…

	if (sequence_end != SIZE_MAX) {
	if (ir.cur.off <= sequence_end) {
	- isolate_runner_set_current_prop(&ir, sequence_…
	+ isolate_runner_set_current_prop(&ir,
	+ sequence_prop);
	} else {
	/* end of sequence, reset */
	sequence_end = SIZE_MAX;
	@@ -498,10 +530,9 @@ process_isolating_run_sequence(int_least32_t *buf, size_t …
	/* N2 */
	isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir);
	while (!isolate_runner_advance(&ir)) {
	- if (ir.cur.prop == BIDI_PROP_B \|\|
	- ir.cur.prop == BIDI_PROP_S \|\|
	- ir.cur.prop == BIDI_PROP_WS \|\|
	- ir.cur.prop == BIDI_PROP_ON \|\|
	+ if (ir.cur.prop == BIDI_PROP_B \|\| ir.cur.prop == BIDI_PROP_S \|\|
	+ ir.cur.prop == BIDI_PROP_WS \|\|
	+ ir.cur.prop == BIDI_PROP_ON \|\|
	ir.cur.prop == BIDI_PROP_FSI \|\|
	ir.cur.prop == BIDI_PROP_LRI \|\|
	ir.cur.prop == BIDI_PROP_RLI \|\|
	@@ -509,10 +540,12 @@ process_isolating_run_sequence(int_least32_t *buf, size_t…
	/* N2 */
	if (ir.cur.level % 2 == 0) {
	/* even embedding level */
	- isolate_runner_set_current_prop(&ir, BIDI_PROP…
	+ isolate_runner_set_current_prop(&ir,
	+ BIDI_PROP_L);
	} else {
	/* odd embedding level */
	- isolate_runner_set_current_prop(&ir, BIDI_PROP…
	+ isolate_runner_set_current_prop(&ir,
	+ BIDI_PROP_R);
	}
	}
	}
	@@ -522,8 +555,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t b…

	static uint_least8_t
	get_paragraph_level(enum grapheme_bidirectional_override override,
	- bool terminate_on_pdi,
	- const int_least32_t *buf, size_t buflen)
	+ bool terminate_on_pdi, const int_least32_t *buf,
	+ size_t buflen)
	{
	struct state s;
	int_least8_t isolate_level;
	@@ -541,8 +574,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov…
	for (bufoff = 0, isolate_level = 0; bufoff < buflen; bufoff++) {
	state_deserialize(buf[bufoff], &s);

	- if (s.prop == BIDI_PROP_PDI &&
	- isolate_level == 0 &&
	+ if (s.prop == BIDI_PROP_PDI && isolate_level == 0 &&
	terminate_on_pdi) {
	/*
	* we are in a FSI-subsection of a paragraph and
	@@ -552,8 +584,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov…
	}

	/* BD8/BD9 */
	- if ((s.prop == BIDI_PROP_LRI \|\|
	- s.prop == BIDI_PROP_RLI \|\|
	+ if ((s.prop == BIDI_PROP_LRI \|\| s.prop == BIDI_PROP_RLI \|\|
	s.prop == BIDI_PROP_FSI) &&
	isolate_level < MAX_DEPTH) {
	/* we hit an isolate initiator, increment counter */
	@@ -570,8 +601,7 @@ get_paragraph_level(enum grapheme_bidirectional_override ov…
	/* P3 */
	if (s.prop == BIDI_PROP_L) {
	return 0;
	- } else if (s.prop == BIDI_PROP_AL \|\|
	- s.prop == BIDI_PROP_R) {
	+ } else if (s.prop == BIDI_PROP_AL \|\| s.prop == BIDI_PROP_R) {
	return 1;
	}
	}
	@@ -585,13 +615,15 @@ get_paragraph_embedding_levels(enum grapheme_bidirectiona…
	{
	enum bidi_property tmp_prop;
	struct state s, t;
	+
	struct {
	int_least8_t level;
	enum grapheme_bidirectional_override override;
	bool directional_isolate;
	} directional_status[MAX_DEPTH + 2], *dirstat = directional_status;
	+
	size_t overflow_isolate_count, overflow_embedding_count,
	- valid_isolate_count, bufoff, i, runsince;
	+ valid_isolate_count, bufoff, i, runsince;
	uint_least8_t paragraph_level;

	paragraph_level = get_paragraph_level(override, false, buf, buflen);
	@@ -600,7 +632,8 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_…
	dirstat->level = (int_least8_t)paragraph_level;
	dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
	dirstat->directional_isolate = false;
	- overflow_isolate_count = overflow_embedding_count = valid_isolate_coun…
	+ overflow_isolate_count = overflow_embedding_count =
	+ valid_isolate_count = 0;

	for (bufoff = 0; bufoff < buflen; bufoff++) {
	state_deserialize(buf[bufoff], &s);
	@@ -608,79 +641,105 @@ get_paragraph_embedding_levels(enum grapheme_bidirection…
	again:
	if (tmp_prop == BIDI_PROP_RLE) {
	/* X2 */
	- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= …
	+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
	+ MAX_DEPTH &&
	overflow_isolate_count == 0 &&
	overflow_embedding_count == 0) {
	/* valid RLE */
	dirstat++;
	- dirstat->level = (dirstat - 1)->level + ((dirs…
	- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
	+ dirstat->level =
	+ (dirstat - 1)->level +
	+ ((dirstat - 1)->level % 2 != 0) + 1;
	+ dirstat->override =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA…
	dirstat->directional_isolate = false;
	} else {
	/* overflow RLE */
	- overflow_embedding_count += (overflow_isolate_…
	+ overflow_embedding_count +=
	+ (overflow_isolate_count == 0);
	}
	} else if (tmp_prop == BIDI_PROP_LRE) {
	/* X3 */
	- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= …
	+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
	+ MAX_DEPTH &&
	overflow_isolate_count == 0 &&
	overflow_embedding_count == 0) {
	/* valid LRE */
	dirstat++;
	- dirstat->level = (dirstat - 1)->level + ((dirs…
	- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
	+ dirstat->level =
	+ (dirstat - 1)->level +
	+ ((dirstat - 1)->level % 2 == 0) + 1;
	+ dirstat->override =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA…
	dirstat->directional_isolate = false;
	} else {
	/* overflow LRE */
	- overflow_embedding_count += (overflow_isolate_…
	+ overflow_embedding_count +=
	+ (overflow_isolate_count == 0);
	}
	} else if (tmp_prop == BIDI_PROP_RLO) {
	/* X4 */
	- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= …
	+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
	+ MAX_DEPTH &&
	overflow_isolate_count == 0 &&
	overflow_embedding_count == 0) {
	/* valid RLO */
	dirstat++;
	- dirstat->level = (dirstat - 1)->level + ((dirs…
	- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
	+ dirstat->level =
	+ (dirstat - 1)->level +
	+ ((dirstat - 1)->level % 2 != 0) + 1;
	+ dirstat->override =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
	dirstat->directional_isolate = false;
	} else {
	/* overflow RLO */
	- overflow_embedding_count += (overflow_isolate_…
	+ overflow_embedding_count +=
	+ (overflow_isolate_count == 0);
	}
	} else if (tmp_prop == BIDI_PROP_LRO) {
	/* X5 */
	- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= …
	+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
	+ MAX_DEPTH &&
	overflow_isolate_count == 0 &&
	overflow_embedding_count == 0) {
	/* valid LRE */
	dirstat++;
	- dirstat->level = (dirstat - 1)->level + ((dirs…
	- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
	+ dirstat->level =
	+ (dirstat - 1)->level +
	+ ((dirstat - 1)->level % 2 == 0) + 1;
	+ dirstat->override =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
	dirstat->directional_isolate = false;
	} else {
	/* overflow LRO */
	- overflow_embedding_count += (overflow_isolate_…
	+ overflow_embedding_count +=
	+ (overflow_isolate_count == 0);
	}
	} else if (tmp_prop == BIDI_PROP_RLI) {
	/* X5a */
	s.level = dirstat->level;
	- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI…
	+ if (dirstat->override ==
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
	s.prop = BIDI_PROP_L;
	- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL…
	+ } else if (dirstat->override ==
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
	s.prop = BIDI_PROP_R;
	}
	state_serialize(&s, &(buf[bufoff]));

	- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= …
	+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
	+ MAX_DEPTH &&
	overflow_isolate_count == 0 &&
	overflow_embedding_count == 0) {
	/* valid RLI */
	valid_isolate_count++;

	dirstat++;
	- dirstat->level = (dirstat - 1)->level + ((dirs…
	- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
	+ dirstat->level =
	+ (dirstat - 1)->level +
	+ ((dirstat - 1)->level % 2 != 0) + 1;
	+ dirstat->override =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA…
	dirstat->directional_isolate = true;
	} else {
	/* overflow RLI */
	@@ -689,22 +748,28 @@ again:
	} else if (tmp_prop == BIDI_PROP_LRI) {
	/* X5b */
	s.level = dirstat->level;
	- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI…
	+ if (dirstat->override ==
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
	s.prop = BIDI_PROP_L;
	- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL…
	+ } else if (dirstat->override ==
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
	s.prop = BIDI_PROP_R;
	}
	state_serialize(&s, &(buf[bufoff]));

	- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= …
	+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
	+ MAX_DEPTH &&
	overflow_isolate_count == 0 &&
	overflow_embedding_count == 0) {
	/* valid LRI */
	valid_isolate_count++;

	dirstat++;
	- dirstat->level = (dirstat - 1)->level + ((dirs…
	- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVE…
	+ dirstat->level =
	+ (dirstat - 1)->level +
	+ ((dirstat - 1)->level % 2 == 0) + 1;
	+ dirstat->override =
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRA…
	dirstat->directional_isolate = true;
	} else {
	/* overflow LRI */
	@@ -712,23 +777,27 @@ again:
	}
	} else if (tmp_prop == BIDI_PROP_FSI) {
	/* X5c */
	- if (get_paragraph_level(GRAPHEME_BIDIRECTIONAL_OVERRID…
	- buf + (bufoff + 1), buflen - (…
	+ if (get_paragraph_level(
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL,
	+ true, buf + (bufoff + 1),
	+ buflen - (bufoff + 1)) == 1) {
	tmp_prop = BIDI_PROP_RLI;
	goto again;
	} else { /* ... == 0 */
	tmp_prop = BIDI_PROP_LRI;
	goto again;
	}
	- } else if (tmp_prop != BIDI_PROP_B &&
	- tmp_prop != BIDI_PROP_BN &&
	+ } else if (tmp_prop != BIDI_PROP_B &&
	+ tmp_prop != BIDI_PROP_BN &&
	tmp_prop != BIDI_PROP_PDF &&
	tmp_prop != BIDI_PROP_PDI) {
	/* X6 */
	s.level = dirstat->level;
	- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI…
	+ if (dirstat->override ==
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
	s.prop = BIDI_PROP_L;
	- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL…
	+ } else if (dirstat->override ==
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
	s.prop = BIDI_PROP_R;
	}
	state_serialize(&s, &(buf[bufoff]));
	@@ -773,9 +842,11 @@ again:
	}

	s.level = dirstat->level;
	- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRI…
	+ if (dirstat->override ==
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
	s.prop = BIDI_PROP_L;
	- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL…
	+ } else if (dirstat->override ==
	+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
	s.prop = BIDI_PROP_R;
	}
	state_serialize(&s, &(buf[bufoff]));
	@@ -796,12 +867,9 @@ again:
	}

	/* X9 */
	- if (tmp_prop == BIDI_PROP_RLE \|\|
	- tmp_prop == BIDI_PROP_LRE \|\|
	- tmp_prop == BIDI_PROP_RLO \|\|
	- tmp_prop == BIDI_PROP_LRO \|\|
	- tmp_prop == BIDI_PROP_PDF \|\|
	- tmp_prop == BIDI_PROP_BN) {
	+ if (tmp_prop == BIDI_PROP_RLE \|\| tmp_prop == BIDI_PROP_LRE \|\|
	+ tmp_prop == BIDI_PROP_RLO \|\| tmp_prop == BIDI_PROP_LRO \|\|
	+ tmp_prop == BIDI_PROP_PDF \|\| tmp_prop == BIDI_PROP_BN) {
	s.level = -1;
	state_serialize(&s, &(buf[bufoff]));
	}
	@@ -811,8 +879,8 @@ again:
	for (bufoff = 0; bufoff < buflen; bufoff++) {
	state_deserialize(buf[bufoff], &s);
	if (!s.visited && s.level != -1) {
	- bufoff += process_isolating_run_sequence(buf, buflen, …
	- paragraph_lev…
	+ bufoff += process_isolating_run_sequence(
	+ buf, buflen, bufoff, paragraph_level);
	}
	}

	@@ -823,7 +891,7 @@ again:
	for (bufoff = 0; bufoff < buflen; bufoff++) {
	state_deserialize(buf[bufoff], &s);

	- if (s.level % 2 == 0 ) {
	+ if (s.level % 2 == 0) {
	/* even level */
	if (s.prop == BIDI_PROP_R) {
	s.level += 1;
	@@ -833,8 +901,7 @@ again:
	}
	} else {
	/* odd level */
	- if (s.prop == BIDI_PROP_L \|\|
	- s.prop == BIDI_PROP_EN \|\|
	+ if (s.prop == BIDI_PROP_L \|\| s.prop == BIDI_PROP_EN \|\|
	s.prop == BIDI_PROP_AN) {
	s.level += 1;
	}
	@@ -853,10 +920,8 @@ again:
	continue;
	}

	- if (s.rawprop == BIDI_PROP_WS \|\|
	- s.rawprop == BIDI_PROP_FSI \|\|
	- s.rawprop == BIDI_PROP_LRI \|\|
	- s.rawprop == BIDI_PROP_RLI \|\|
	+ if (s.rawprop == BIDI_PROP_WS \|\| s.rawprop == BIDI_PROP_FSI \|\|
	+ s.rawprop == BIDI_PROP_LRI \|\| s.rawprop == BIDI_PROP_RLI \|\|
	s.rawprop == BIDI_PROP_PDI) {
	if (runsince == SIZE_MAX) {
	/* a new run has begun */
	@@ -878,8 +943,7 @@ again:
	runsince = SIZE_MAX;
	}

	- if (s.rawprop == BIDI_PROP_S \|\|
	- s.rawprop == BIDI_PROP_B) {
	+ if (s.rawprop == BIDI_PROP_S \|\| s.rawprop == BIDI_PROP_B) {
	s.level = (int_least8_t)paragraph_level;
	state_serialize(&s, &(buf[bufoff]));
	}
	@@ -902,7 +966,8 @@ again:
	}

	static size_t
	-get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override…
	+get_embedding_levels(HERODOTUS_READER *r,
	+ enum grapheme_bidirectional_override override,
	int_least32_t *buf, size_t buflen)
	{
	struct state s;
	@@ -911,8 +976,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bid…

	if (buf == NULL) {
	for (; herodotus_read_codepoint(r, true, &cp) ==
	- HERODOTUS_STATUS_SUCCESS;)
	+ HERODOTUS_STATUS_SUCCESS;) {
	;
	+ }

	/* see below for return value reasoning */
	return herodotus_reader_number_read(r);
	@@ -922,8 +988,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bid…
	* the first step is to determine the bidirectional properties
	* and store them in the buffer
	*/
	- for (bufoff = 0; herodotus_read_codepoint(r, true, &cp) ==
	- HERODOTUS_STATUS_SUCCESS; bufoff++) {
	+ for (bufoff = 0;
	+ herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCCES…
	+ bufoff++) {
	if (bufoff < buflen) {
	/*
	* actually only do something when we have
	@@ -974,9 +1041,10 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_b…
	}

	size_t
	-grapheme_get_bidirectional_embedding_levels(const uint_least32_t *src, size_t …
	- enum grapheme_bidirectional_overri…
	- int_least32_t *dest, size_t destle…
	+grapheme_get_bidirectional_embedding_levels(
	+ const uint_least32_t *src, size_t srclen,
	+ enum grapheme_bidirectional_override override, int_least32_t *dest,
	+ size_t destlen)
	{
	HERODOTUS_READER r;

	@@ -986,9 +1054,10 @@ grapheme_get_bidirectional_embedding_levels(const uint_le…
	}

	size_t
	-grapheme_get_bidirectional_embedding_levels_utf8(const char *src, size_t srcle…
	- enum grapheme_bidirectional_o…
	- int_least32_t *dest, size_t d…
	+grapheme_get_bidirectional_embedding_levels_utf8(
	+ const char *src, size_t srclen,
	+ enum grapheme_bidirectional_override override, int_least32_t *dest,
	+ size_t destlen)
	{
	HERODOTUS_READER r;

	diff --git a/src/case.c b/src/case.c
	@@ -2,8 +2,8 @@
	#include <stddef.h>
	#include <stdint.h>

	-#include "../grapheme.h"
	#include "../gen/case.h"
	+#include "../grapheme.h"
	#include "util.h"

	static inline enum case_property
	@@ -11,7 +11,7 @@ get_case_property(uint_least32_t cp)
	{
	if (likely(cp <= UINT32_C(0x10FFFF))) {
	return (enum case_property)
	- case_minor[case_major[cp >> 8] + (cp & 0xFF)];
	+ case_minor[case_major[cp >> 8] + (cp & 0xFF)];
	} else {
	return CASE_PROP_OTHER;
	}
	@@ -45,58 +45,64 @@ to_case(HERODOTUS_READER r, HERODOTUS_WRITER w,
	uint_least32_t cp, tmp_cp;
	int_least32_t map;

	- for (; herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCC…
	+ for (; herodotus_read_codepoint(r, true, &cp) ==
	+ HERODOTUS_STATUS_SUCCESS;) {
	if (sc == lower_special) {
	/*
	- * For the special Final_Sigma-rule (see SpecialCasing…
	- * which is the only non-localized case-dependent rule,
	- * we apply a different mapping when a sigma is at the
	- * end of a word.
	+ * For the special Final_Sigma-rule (see
	+ * SpecialCasing.txt), which is the only non-localized
	+ * case-dependent rule, we apply a different mapping
	+ * when a sigma is at the end of a word.
	*
	* Before: cased case-ignorable*
	* After: not(case-ignorable* cased)
	*
	- * We check the after-condition on demand, but the bef…
	- * condition is best checked using the "level"-heurist…
	- * also used in the sentence and line breaking-impleme…
	+ * We check the after-condition on demand, but the
	+ * before- condition is best checked using the
	+ * "level"-heuristic also used in the sentence and line
	+ * breaking-implementations.
	*/
	- if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER …
	+ if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER
	+ SIGMA */
	(final_sigma_level == 1 \|\|
	final_sigma_level == 2)) {
	/*
	* check succeeding characters by first skippi…
	- * all case-ignorable characters and then chec…
	- * if the succeeding character is cased, inval…
	- * the after-condition
	+ * all case-ignorable characters and then
	+ * checking if the succeeding character is
	+ * cased, invalidating the after-condition
	*/
	herodotus_reader_copy(r, &tmp);
	for (prop = NUM_CASE_PROPS;
	- (s = herodotus_read_codepoint(&tmp, true,…
	- HERODOTUS_STATUS_SUCCESS; ) {
	+ (s = herodotus_read_codepoint(&tmp, true,
	+ &tmp_cp)) ==
	+ HERODOTUS_STATUS_SUCCESS;) {
	prop = get_case_property(tmp_cp);

	if (prop != CASE_PROP_CASE_IGNORABLE &&
	prop != CASE_PROP_BOTH_CASED_CASE_…
	- break;
	+ break;
	}
	}

	/*
	- * Now prop is something other than case-ignor…
	- * the source-string ended.
	- * If it is something other than cased, we know
	+ * Now prop is something other than
	+ * case-ignorable or the source-string ended. …
	+ * it is something other than cased, we know
	* that the after-condition holds
	*/
	if (s != HERODOTUS_STATUS_SUCCESS \|\|
	(prop != CASE_PROP_CASED &&
	prop != CASE_PROP_BOTH_CASED_CASE_IGNORAB…
	/*
	- * write GREEK SMALL LETTER FINAL SIGM…
	- * destination
	+ * write GREEK SMALL LETTER FINAL SIGMA
	+ * to destination
	+ */
	+ herodotus_write_codepoint(
	+ w, UINT32_C(0x03C2));
	+
	+ /* reset Final_Sigma-state and continue
	*/
	- herodotus_write_codepoint(w, UINT32_C(…
	-
	- /* reset Final_Sigma-state and continu…
	final_sigma_level = 0;
	continue;
	}
	@@ -110,11 +116,13 @@ to_case(HERODOTUS_READER r, HERODOTUS_WRITER w,
	prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
	/* sequence has begun */
	final_sigma_level = 1;
	- } else if ((final_sigma_level == 1 \|\|
	- final_sigma_level == 2) &&
	- (prop == CASE_PROP_CASE_IGNORABLE \|\|
	- prop == CASE_PROP_BOTH_CASED_CASE_IGNORABL…
	- /* case-ignorable sequence begins or continued…
	+ } else if (
	+ (final_sigma_level == 1 \|\|
	+ final_sigma_level == 2) &&
	+ (prop == CASE_PROP_CASE_IGNORABLE \|\|
	+ prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE))…
	+ /* case-ignorable sequence begins or continued
	+ */
	final_sigma_level = 2;
	} else {
	/* sequence broke */
	@@ -134,8 +142,8 @@ to_case(HERODOTUS_READER r, HERODOTUS_WRITER w,
	}
	} else {
	/* we have a simple mapping */
	- herodotus_write_codepoint(w, (uint_least32_t)
	- ((int_least32_t)cp + map));
	+ herodotus_write_codepoint(
	+ w, (uint_least32_t)((int_least32_t)cp + map));
	}
	}

	@@ -168,14 +176,16 @@ to_titlecase(HERODOTUS_READER r, HERODOTUS_WRITER w)

	for (; (nwb = herodotus_next_word_break(r)) > 0;) {
	herodotus_reader_push_advance_limit(r, nwb);
	- for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODO…
	+ for (; (s = herodotus_read_codepoint(r, false, &cp)) ==
	+ HERODOTUS_STATUS_SUCCESS;) {
	/* check if we have a cased character */
	prop = get_case_property(cp);
	if (prop == CASE_PROP_CASED \|\|
	prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
	break;
	} else {
	- /* write the data to the output verbatim, it i…
	+ /* write the data to the output verbatim, it if
	+ * permits */
	herodotus_write_codepoint(w, cp);

	/* increment reader */
	@@ -199,9 +209,10 @@ to_titlecase(HERODOTUS_READER r, HERODOTUS_WRITER w)
	* we encountered a cased character before the word
	* break, convert it to titlecase
	*/
	- herodotus_reader_push_advance_limit(r,
	- herodotus_reader_next_codepoint_break(r));
	- to_case(r, w, 0, title_major, title_minor, title_speci…
	+ herodotus_reader_push_advance_limit(
	+ r, herodotus_reader_next_codepoint_break(r));
	+ to_case(r, w, 0, title_major, title_minor,
	+ title_special);
	herodotus_reader_pop_limit(r);
	}

	@@ -218,7 +229,8 @@ to_titlecase(HERODOTUS_READER r, HERODOTUS_WRITER w)
	}

	size_t
	-grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, uint_least32_t…
	+grapheme_to_uppercase(const uint_least32_t *src, size_t srclen,
	+ uint_least32_t *dest, size_t destlen)
	{
	HERODOTUS_READER r;
	HERODOTUS_WRITER w;
	@@ -230,7 +242,8 @@ grapheme_to_uppercase(const uint_least32_t *src, size_t src…
	}

	size_t
	-grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, uint_least32_t…
	+grapheme_to_lowercase(const uint_least32_t *src, size_t srclen,
	+ uint_least32_t *dest, size_t destlen)
	{
	HERODOTUS_READER r;
	HERODOTUS_WRITER w;
	@@ -242,7 +255,8 @@ grapheme_to_lowercase(const uint_least32_t *src, size_t src…
	}

	size_t
	-grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, uint_least32_t…
	+grapheme_to_titlecase(const uint_least32_t *src, size_t srclen,
	+ uint_least32_t *dest, size_t destlen)
	{
	HERODOTUS_READER r;
	HERODOTUS_WRITER w;
	@@ -254,7 +268,8 @@ grapheme_to_titlecase(const uint_least32_t *src, size_t src…
	}

	size_t
	-grapheme_to_uppercase_utf8(const char src, size_t srclen, char dest, size_t …
	+grapheme_to_uppercase_utf8(const char src, size_t srclen, char dest,
	+ size_t destlen)
	{
	HERODOTUS_READER r;
	HERODOTUS_WRITER w;
	@@ -266,7 +281,8 @@ grapheme_to_uppercase_utf8(const char *src, size_t srclen, …
	}

	size_t
	-grapheme_to_lowercase_utf8(const char src, size_t srclen, char dest, size_t …
	+grapheme_to_lowercase_utf8(const char src, size_t srclen, char dest,
	+ size_t destlen)
	{
	HERODOTUS_READER r;
	HERODOTUS_WRITER w;
	@@ -278,7 +294,8 @@ grapheme_to_lowercase_utf8(const char *src, size_t srclen, …
	}

	size_t
	-grapheme_to_titlecase_utf8(const char src, size_t srclen, char dest, size_t …
	+grapheme_to_titlecase_utf8(const char src, size_t srclen, char dest,
	+ size_t destlen)
	{
	HERODOTUS_READER r;
	HERODOTUS_WRITER w;
	@@ -299,7 +316,8 @@ is_case(HERODOTUS_READER r, const uint_least16_t major,
	uint_least32_t cp;
	int_least32_t map;

	- for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUC…
	+ for (; herodotus_read_codepoint(r, false, &cp) ==
	+ HERODOTUS_STATUS_SUCCESS;) {
	/* get and handle case mapping */
	if (unlikely((map = get_case_offset(cp, major, minor)) >=
	INT32_C(0x110000))) {
	@@ -315,7 +333,8 @@ is_case(HERODOTUS_READER r, const uint_least16_t major,
	goto done;
	} else {
	/* move forward */
	- herodotus_read_codepoint(r, tr…
	+ herodotus_read_codepoint(
	+ r, true, &cp);
	}
	} else {
	/*
	@@ -357,7 +376,8 @@ is_titlecase(HERODOTUS_READER r, size_t output)

	for (; (nwb = herodotus_next_word_break(r)) > 0;) {
	herodotus_reader_push_advance_limit(r, nwb);
	- for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODO…
	+ for (; (s = herodotus_read_codepoint(r, false, &cp)) ==
	+ HERODOTUS_STATUS_SUCCESS;) {
	/* check if we have a cased character */
	prop = get_case_property(cp);
	if (prop == CASE_PROP_CASED \|\|
	@@ -384,17 +404,20 @@ is_titlecase(HERODOTUS_READER r, size_t output)
	* we encountered a cased character before the word
	* break, check if it's titlecase
	*/
	- herodotus_reader_push_advance_limit(r,
	- herodotus_reader_next_codepoint_break(r));
	- if (!is_case(r, title_major, title_minor, title_specia…
	+ herodotus_reader_push_advance_limit(
	+ r, herodotus_reader_next_codepoint_break(r));
	+ if (!is_case(r, title_major, title_minor, title_specia…
	+ NULL)) {
	ret = false;
	goto done;
	}
	herodotus_reader_pop_limit(r);
	}

	- /* check if the rest of the codepoints in the word are lowerca…
	- if (!is_case(r, lower_major, lower_minor, lower_special, NULL)…
	+ /* check if the rest of the codepoints in the word are lowerca…
	+ */
	+ if (!is_case(r, lower_major, lower_minor, lower_special,
	+ NULL)) {
	ret = false;
	goto done;
	}
	diff --git a/src/character.c b/src/character.c
	@@ -16,83 +16,80 @@ struct character_break_state {

	static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
	[CHAR_BREAK_PROP_OTHER] =
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	- [CHAR_BREAK_PROP_CR] =
	- UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ [CHAR_BREAK_PROP_CR] = UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */
	[CHAR_BREAK_PROP_EXTEND] =
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_HANGUL_L] =
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L \| /* GB6 */
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V \| /* GB6 */
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV \| /* GB6 */
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT \| /* GB6 */
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L \| /* GB6 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V \| /* GB6 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV \| /* GB6 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT \| /* GB6 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_HANGUL_V] =
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V \| /* GB7 */
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T \| /* GB7 */
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V \| /* GB7 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T \| /* GB7 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_HANGUL_T] =
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T \| /* GB8 */
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T \| /* GB8 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_HANGUL_LV] =
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V \| /* GB7 */
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T \| /* GB7 */
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V \| /* GB7 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T \| /* GB7 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_HANGUL_LVT] =
	- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T \| /* GB8 */
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T \| /* GB8 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_PREPEND] =
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK \| /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK \| /* GB9a */
	(UINT16_C(0xFFFF) &
	- ~(UINT16_C(1) << CHAR_BREAK_PROP_CR \|
	- UINT16_C(1) << CHAR_BREAK_PROP_LF \|
	- UINT16_C(1) << CHAR_BREAK_PROP_CONTROL
	- )
	- ), /* GB9b */
	+ ~(UINT16_C(1) << CHAR_BREAK_PROP_CR \|
	+ UINT16_C(1) << CHAR_BREAK_PROP_LF \|
	+ UINT16_C(1) << CHAR_BREAK_PROP_CONTROL)), /* GB9b */
	[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_SPACINGMARK] =
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	[CHAR_BREAK_PROP_ZWJ] =
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \| /* GB9 */
	+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
	};
	static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
	[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \|
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \|
	UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
	[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
	UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
	[CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
	- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \|
	+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND \|
	UINT16_C(1) << CHAR_BREAK_PROP_ZWJ,
	[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
	- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \|
	+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ \|
	UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
	};
	static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
	@@ -113,7 +110,8 @@ get_break_prop(uint_least32_t cp)
	{
	if (likely(cp <= UINT32_C(0x10FFFF))) {
	return (enum char_break_property)
	- char_break_minor[char_break_major[cp >> 8] + (cp & 0xFF…
	+ char_break_minor[char_break_major[cp >> 8] +
	+ (cp & 0xFF)];
	} else {
	return CHAR_BREAK_PROP_OTHER;
	}
	@@ -122,23 +120,27 @@ get_break_prop(uint_least32_t cp)
	static inline void
	state_serialize(const struct character_break_state in, uint_least16_t out)
	{
	- *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) \| …
	- (uint_least16_t)(((uint_least16_t)(in->prop_set)) << 8) \| …
	- (uint_least16_t)(((uint_least16_t)(in->gb11_flag)) << 9) \| …
	- (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag)) << 10); …
	+ out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) \| / first 8 bits */
	+ (uint_least16_t)(((uint_least16_t)(in->prop_set))
	+ << 8) \| /* 9th bit */
	+ (uint_least16_t)(((uint_least16_t)(in->gb11_flag))
	+ << 9) \| /* 10th bit */
	+ (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag))
	+ << 10); /* 11th bit */
	}

	static inline void
	state_deserialize(uint_least16_t in, struct character_break_state *out)
	{
	- out->prop = in & UINT8_C(0xFF);
	- out->prop_set = in & (UINT16_C(1) << 8);
	- out->gb11_flag = in & (UINT16_C(1) << 9);
	+ out->prop = in & UINT8_C(0xFF);
	+ out->prop_set = in & (UINT16_C(1) << 8);
	+ out->gb11_flag = in & (UINT16_C(1) << 9);
	out->gb12_13_flag = in & (UINT16_C(1) << 10);
	}

	bool
	-grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least…
	+grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1,
	+ uint_least16_t *s)
	{
	struct character_break_state state;
	enum char_break_property cp0_prop, cp1_prop;
	@@ -161,23 +163,26 @@ grapheme_is_character_break(uint_least32_t cp0, uint_leas…
	/* update flags */
	state.gb11_flag =
	flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS *
	- state.gb11_flag] &
	+ state.gb11_flag] &
	UINT16_C(1) << cp1_prop;
	state.gb12_13_flag =
	- flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS *
	- state.gb12_13_flag] &
	- UINT16_C(1) << cp1_prop;
	+ flag_update_gb12_13[cp0_prop +
	+ NUM_CHAR_BREAK_PROPS *
	+ state.gb12_13_flag] &
	+ UINT16_C(1) << cp1_prop;

	/*
	* Apply grapheme cluster breaking algorithm (UAX #29), see
	* http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_…
	*/
	notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) …
	- (dont_break_gb11[cp0_prop + state.gb11_flag *
	- NUM_CHAR_BREAK_PROPS] &
	+ (dont_break_gb11[cp0_prop +
	+ state.gb11_flag *
	+ NUM_CHAR_BREAK_PROPS] &
	(UINT16_C(1) << cp1_prop)) \|\|
	- (dont_break_gb12_13[cp0_prop + state.gb12_13_flag *
	- NUM_CHAR_BREAK_PROPS] &
	+ (dont_break_gb12_13[cp0_prop +
	+ state.gb12_13_flag *
	+ NUM_CHAR_BREAK_PROPS] &
	(UINT16_C(1) << cp1_prop));

	/* update or reset flags (when we have a break) */
	@@ -198,8 +203,10 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least…
	* were all set to false
	*/
	notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) …
	- (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_pr…
	- (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1…
	+ (dont_break_gb11[cp0_prop] &
	+ (UINT16_C(1) << cp1_prop)) \|\|
	+ (dont_break_gb12_13[cp0_prop] &
	+ (UINT16_C(1) << cp1_prop));
	}

	return !notbreak;
	@@ -212,7 +219,8 @@ next_character_break(HERODOTUS_READER *r)
	uint_least32_t cp0 = 0, cp1 = 0;

	for (herodotus_read_codepoint(r, true, &cp0);
	- herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCC…
	+ herodotus_read_codepoint(r, false, &cp1) ==
	+ HERODOTUS_STATUS_SUCCESS;
	herodotus_read_codepoint(r, true, &cp0)) {
	if (grapheme_is_character_break(cp0, cp1, &state)) {
	break;
	diff --git a/src/line.c b/src/line.c
	@@ -11,7 +11,8 @@ get_break_prop(uint_least32_t cp)
	{
	if (likely(cp <= UINT32_C(0x10FFFF))) {
	return (enum line_break_property)
	- line_break_minor[line_break_major[cp >> 8] + (cp & 0xff…
	+ line_break_minor[line_break_major[cp >> 8] +
	+ (cp & 0xff)];
	} else {
	return LINE_BREAK_PROP_AL;
	}
	@@ -22,7 +23,7 @@ next_line_break(HERODOTUS_READER *r)
	{
	HERODOTUS_READER tmp;
	enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop,
	- last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
	+ last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
	uint_least32_t cp;
	uint_least8_t lb25_level = 0;
	bool lb21a_flag = false, ri_even = true;
	@@ -43,8 +44,10 @@ next_line_break(HERODOTUS_READER *r)
	last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */
	last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS;

	- for (herodotus_read_codepoint(r, true, &cp), cp0_prop = get_break_prop…
	- herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCE…
	+ for (herodotus_read_codepoint(r, true, &cp),
	+ cp0_prop = get_break_prop(cp);
	+ herodotus_read_codepoint(r, false, &cp) ==
	+ HERODOTUS_STATUS_SUCCESS;
	herodotus_read_codepoint(r, true, &cp), cp0_prop = cp1_prop) {
	/* get property of the right codepoint */
	cp1_prop = get_break_prop(cp);
	@@ -59,10 +62,11 @@ next_line_break(HERODOTUS_READER *r)
	cp0_prop != LINE_BREAK_PROP_ZWJ) {
	/*
	* check if the property we are overwriting now is an
	- * HL. If so, we set the LB21a-flag which depends on t…
	- * knowledge.
	+ * HL. If so, we set the LB21a-flag which depends on
	+ * this knowledge.
	*/
	- lb21a_flag = (last_non_cm_or_zwj_prop == LINE_BREAK_PR…
	+ lb21a_flag =
	+ (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL…

	/* check regional indicator state */
	if (cp0_prop == LINE_BREAK_PROP_RI) {
	@@ -109,8 +113,7 @@ next_line_break(HERODOTUS_READER *r)
	* and one (CL \| CP) to the left of the middle
	* spot
	*/
	- if ((lb25_level == 0 \|\|
	- lb25_level == 1) &&
	+ if ((lb25_level == 0 \|\| lb25_level == 1) &&
	cp0_prop == LINE_BREAK_PROP_NU) {
	/* sequence has begun */
	lb25_level = 1;
	@@ -118,12 +121,15 @@ next_line_break(HERODOTUS_READER *r)
	(cp0_prop == LINE_BREAK_PROP_NU \|\|
	cp0_prop == LINE_BREAK_PROP_SY \|\|
	cp0_prop == LINE_BREAK_PROP_IS)) {
	- /* (NU \| SY \| IS) sequence begins or continued…
	+ /* (NU \| SY \| IS) sequence begins or continued
	+ */
	lb25_level = 2;
	- } else if ((lb25_level == 1 \|\| lb25_level == 2) &&
	- (cp0_prop == LINE_BREAK_PROP_CL …
	- cp0_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW…
	- cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HW…
	+ } else if (
	+ (lb25_level == 1 \|\| lb25_level == 2) &&
	+ (cp0_prop == LINE_BREAK_PROP_CL \|\|
	+ cp0_prop ==
	+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF \|\|
	+ cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF))…
	/* CL or CP at the end of the sequence */
	lb25_level = 3;
	} else {
	@@ -229,17 +235,19 @@ next_line_break(HERODOTUS_READER *r)
	/* LB13 (affected by tailoring for LB25, see example 7) */
	if (cp1_prop == LINE_BREAK_PROP_EX \|\|
	(last_non_cm_or_zwj_prop != LINE_BREAK_PROP_NU &&
	- (cp1_prop == LINE_BREAK_PROP_CL \|\|
	+ (cp1_prop == LINE_BREAK_PROP_CL \|\|
	cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF \|\|
	- cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF \|\|
	- cp1_prop == LINE_BREAK_PROP_IS \|\|
	+ cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF \|\|
	+ cp1_prop == LINE_BREAK_PROP_IS \|\|
	cp1_prop == LINE_BREAK_PROP_SY))) {
	continue;
	}

	/* LB14 */
	- if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_E…
	- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_…
	+ if (last_non_sp_cm_or_zwj_prop ==
	+ LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF \|\|
	+ last_non_sp_cm_or_zwj_prop ==
	+ LINE_BREAK_PROP_OP_WITH_EAW_HWF) {
	continue;
	}

	@@ -251,9 +259,11 @@ next_line_break(HERODOTUS_READER *r)
	}

	/* LB16 */
	- if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL …
	- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_…
	- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITH_EAW…
	+ if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL \|\|
	+ last_non_sp_cm_or_zwj_prop ==
	+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF \|\|
	+ last_non_sp_cm_or_zwj_prop ==
	+ LINE_BREAK_PROP_CP_WITH_EAW_HWF) &&
	cp1_prop == LINE_BREAK_PROP_NS) {
	continue;
	}
	@@ -308,7 +318,7 @@ next_line_break(HERODOTUS_READER *r)
	}

	/* LB23 */
	- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL \|\|
	+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL \|\|
	last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
	cp1_prop == LINE_BREAK_PROP_NU) {
	continue;
	@@ -336,11 +346,11 @@ next_line_break(HERODOTUS_READER *r)
	/* LB24 */
	if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR \|\|
	last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO) &&
	- (cp1_prop == LINE_BREAK_PROP_AL \|\|
	+ (cp1_prop == LINE_BREAK_PROP_AL \|\|
	cp1_prop == LINE_BREAK_PROP_HL)) {
	continue;
	}
	- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL \|\|
	+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL \|\|
	last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
	(cp1_prop == LINE_BREAK_PROP_PR \|\|
	cp1_prop == LINE_BREAK_PROP_PO)) {
	@@ -362,32 +372,33 @@ next_line_break(HERODOTUS_READER *r)
	herodotus_reader_copy(r, &tmp);
	herodotus_read_codepoint(&tmp, true, &cp);
	if (herodotus_read_codepoint(&tmp, true, &cp) ==
	- HERODOTUS_STATUS_SUCCESS &&
	+ HERODOTUS_STATUS_SUCCESS &&
	(cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF \|\|
	- cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF \|\|
	+ cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF \|\|
	cp1_prop == LINE_BREAK_PROP_HY)) {
	if (get_break_prop(cp) == LINE_BREAK_PROP_NU) {
	continue;
	}
	}
	}
	- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW…
	- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HW…
	+ if ((last_non_cm_or_zwj_prop ==
	+ LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF \|\|
	+ last_non_cm_or_zwj_prop ==
	+ LINE_BREAK_PROP_OP_WITH_EAW_HWF \|\|
	last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY) &&
	cp1_prop == LINE_BREAK_PROP_NU) {
	continue;
	}
	- if (lb25_level == 1 &&
	- (cp1_prop == LINE_BREAK_PROP_NU \|\|
	- cp1_prop == LINE_BREAK_PROP_SY \|\|
	- cp1_prop == LINE_BREAK_PROP_IS)) {
	+ if (lb25_level == 1 && (cp1_prop == LINE_BREAK_PROP_NU \|\|
	+ cp1_prop == LINE_BREAK_PROP_SY \|\|
	+ cp1_prop == LINE_BREAK_PROP_IS)) {
	continue;
	}
	if ((lb25_level == 1 \|\| lb25_level == 2) &&
	- (cp1_prop == LINE_BREAK_PROP_NU \|\|
	- cp1_prop == LINE_BREAK_PROP_SY \|\|
	- cp1_prop == LINE_BREAK_PROP_IS \|\|
	- cp1_prop == LINE_BREAK_PROP_CL \|\|
	+ (cp1_prop == LINE_BREAK_PROP_NU \|\|
	+ cp1_prop == LINE_BREAK_PROP_SY \|\|
	+ cp1_prop == LINE_BREAK_PROP_IS \|\|
	+ cp1_prop == LINE_BREAK_PROP_CL \|\|
	cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF \|\|
	cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
	continue;
	@@ -437,37 +448,37 @@ next_line_break(HERODOTUS_READER *r)
	}

	/* LB28 */
	- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL \|\|
	+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL \|\|
	last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
	- (cp1_prop == LINE_BREAK_PROP_AL \|\|
	+ (cp1_prop == LINE_BREAK_PROP_AL \|\|
	cp1_prop == LINE_BREAK_PROP_HL)) {
	continue;
	}

	/* LB29 */
	if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_IS &&
	- (cp1_prop == LINE_BREAK_PROP_AL \|\|
	+ (cp1_prop == LINE_BREAK_PROP_AL \|\|
	cp1_prop == LINE_BREAK_PROP_HL)) {
	continue;
	}

	/* LB30 */
	- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL \|\|
	- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL \|\|
	+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL \|\|
	+ last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL \|\|
	last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU) &&
	cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF) {
	continue;
	}
	- if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_…
	- (cp1_prop == LINE_BREAK_PROP_AL \|\|
	- cp1_prop == LINE_BREAK_PROP_HL \|\|
	+ if (last_non_cm_or_zwj_prop ==
	+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF &&
	+ (cp1_prop == LINE_BREAK_PROP_AL \|\|
	+ cp1_prop == LINE_BREAK_PROP_HL \|\|
	cp1_prop == LINE_BREAK_PROP_NU)) {
	continue;
	}

	/* LB30a */
	- if (!ri_even &&
	- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI &&
	+ if (!ri_even && last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI …
	cp1_prop == LINE_BREAK_PROP_RI) {
	continue;
	}
	@@ -477,7 +488,8 @@ next_line_break(HERODOTUS_READER *r)
	cp1_prop == LINE_BREAK_PROP_EM) {
	continue;
	}
	- if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BOTH_CN_EXTPICT…
	+ if (last_non_cm_or_zwj_prop ==
	+ LINE_BREAK_PROP_BOTH_CN_EXTPICT &&
	cp1_prop == LINE_BREAK_PROP_EM) {
	continue;
	}
	diff --git a/src/sentence.c b/src/sentence.c
	@@ -6,8 +6,7 @@
	#include "../grapheme.h"
	#include "util.h"

	-struct sentence_break_state
	-{
	+struct sentence_break_state {
	uint_least8_t aterm_close_sp_level;
	uint_least8_t saterm_close_sp_parasep_level;
	};
	@@ -17,8 +16,8 @@ get_sentence_break_prop(uint_least32_t cp)
	{
	if (likely(cp <= UINT32_C(0x10FFFF))) {
	return (uint_least8_t)
	- sentence_break_minor[sentence_break_major[cp >> 8] +
	- (cp & 0xff)];
	+ sentence_break_minor[sentence_break_major[cp >> 8] +
	+ (cp & 0xff)];
	} else {
	return SENTENCE_BREAK_PROP_OTHER;
	}
	@@ -80,7 +79,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
	state->aterm_close_sp_level = 2;
	} else if ((state->aterm_close_sp_level == 1 \|\|
	state->aterm_close_sp_level == 2 \|\|
	- state->aterm_close_sp_level == 3) &&
	+ state->aterm_close_sp_level == 3) &&
	prop == SENTENCE_BREAK_PROP_SP) {
	/* sp-sequence begins or continued */
	state->aterm_close_sp_level = 3;
	@@ -102,7 +101,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
	state->saterm_close_sp_parasep_level = 2;
	} else if ((state->saterm_close_sp_parasep_level == 1 \|\|
	state->saterm_close_sp_parasep_level == 2 \|\|
	- state->saterm_close_sp_parasep_level == 3) &&
	+ state->saterm_close_sp_parasep_level == 3) &&
	prop == SENTENCE_BREAK_PROP_SP) {
	/* sp-sequence begins or continued */
	state->saterm_close_sp_parasep_level = 3;
	@@ -110,7 +109,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
	state->saterm_close_sp_parasep_level == 2 \|\|
	state->saterm_close_sp_parasep_level == 3) &&
	(prop == SENTENCE_BREAK_PROP_SEP \|\|
	- prop == SENTENCE_BREAK_PROP_CR \|\|
	+ prop == SENTENCE_BREAK_PROP_CR \|\|
	prop == SENTENCE_BREAK_PROP_LF)) {
	/* ParaSep at the end of the sequence */
	state->saterm_close_sp_parasep_level = 4;
	@@ -146,7 +145,7 @@ next_sentence_break(HERODOTUS_READER *r)

	/* SB4 */
	if (p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_SEP \|\|
	- p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR \|\|
	+ p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR \|\|
	p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_LF) {
	break;
	}
	@@ -179,7 +178,8 @@ next_sentence_break(HERODOTUS_READER *r)
	* This is the most complicated rule, requiring
	* the right-hand-side to satisfy the regular expressi…
	*
	- * ( ¬(OLetter \| Upper \| Lower \| ParaSep \| SATerm) )…
	+ * ( ¬(OLetter \| Upper \| Lower \| ParaSep \| SATerm) )*
	+ * Lower
	*
	* which we simply check "manually" given LUT-lookups
	* are very cheap by starting at the mid_reader.
	@@ -198,12 +198,12 @@ next_sentence_break(HERODOTUS_READER *r)
	* match the following condition
	*/
	if (prop == SENTENCE_BREAK_PROP_OLETTER \|\|
	- prop == SENTENCE_BREAK_PROP_UPPER \|\|
	- prop == SENTENCE_BREAK_PROP_LOWER \|\|
	- prop == SENTENCE_BREAK_PROP_SEP \|\|
	- prop == SENTENCE_BREAK_PROP_CR \|\|
	- prop == SENTENCE_BREAK_PROP_LF \|\|
	- prop == SENTENCE_BREAK_PROP_STERM \|\|
	+ prop == SENTENCE_BREAK_PROP_UPPER \|\|
	+ prop == SENTENCE_BREAK_PROP_LOWER \|\|
	+ prop == SENTENCE_BREAK_PROP_SEP \|\|
	+ prop == SENTENCE_BREAK_PROP_CR \|\|
	+ prop == SENTENCE_BREAK_PROP_LF \|\|
	+ prop == SENTENCE_BREAK_PROP_STERM \|\|
	prop == SENTENCE_BREAK_PROP_ATERM) {
	break;
	}
	@@ -219,8 +219,8 @@ next_sentence_break(HERODOTUS_READER *r)
	state.saterm_close_sp_parasep_level == 2 \|\|
	state.saterm_close_sp_parasep_level == 3) &&
	(p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SCONTINUE \|\|
	- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM \|\|
	- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) {
	+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM \|\|
	+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) {
	continue;
	}

	@@ -228,9 +228,9 @@ next_sentence_break(HERODOTUS_READER *r)
	if ((state.saterm_close_sp_parasep_level == 1 \|\|
	state.saterm_close_sp_parasep_level == 2) &&
	(p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CLOSE \|\|
	- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP \|\|
	- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP \|\|
	- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR \|\|
	+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP \|\|
	+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP \|\|
	+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR \|\|
	p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) {
	continue;
	}
	@@ -239,9 +239,9 @@ next_sentence_break(HERODOTUS_READER *r)
	if ((state.saterm_close_sp_parasep_level == 1 \|\|
	state.saterm_close_sp_parasep_level == 2 \|\|
	state.saterm_close_sp_parasep_level == 3) &&
	- (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP \|\|
	+ (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP \|\|
	p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP \|\|
	- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR \|\|
	+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR \|\|
	p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) {
	continue;
	}
	diff --git a/src/utf8.c b/src/utf8.c
	@@ -9,14 +9,14 @@

	/* lookup-table for the types of sequence first bytes */
	static const struct {
	- uint_least8_t lower; /* lower bound of sequence first byte */
	- uint_least8_t upper; /* upper bound of sequence first byte */
	+ uint_least8_t lower; /* lower bound of sequence first byte */
	+ uint_least8_t upper; /* upper bound of sequence first byte */
	uint_least32_t mincp; /* smallest non-overlong encoded codepoint */
	uint_least32_t maxcp; /* largest encodable codepoint */
	- /*
	- * implicit: table-offset represents the number of following
	- * bytes of the form 10xxxxxx (6 bits capacity each)
	- */
	+ /*
	+ * implicit: table-offset represents the number …
	+ * bytes of the form 10xxxxxx (6 bits capacity e…
	+ */
	} lut[] = {
	[0] = {
	/* 0xxxxxxx */
	@@ -104,8 +104,8 @@ grapheme_decode_utf8(const char *str, size_t len, uint_leas…
	* sequence starter occurs right before a NUL-byte.
	*/
	for (i = 0; 1 + i < len; i++) {
	- if(!BETWEEN(((const unsigned char *)str)[1 + i],
	- 0x80, 0xBF)) {
	+ if (!BETWEEN(((const unsigned char *)str)[1 + i], 0x80,
	+ 0xBF)) {
	break;
	}
	}
	@@ -124,7 +124,7 @@ grapheme_decode_utf8(const char *str, size_t len, uint_leas…
	* (i.e. between 0x80 (10000000) and 0xBF (10111111))
	*/
	for (i = 1; i <= off; i++) {
	- if(!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
	+ if (!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
	/*
	* byte does not match format; return
	* number of bytes processed excluding the
	@@ -201,8 +201,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t l…
	* We do not overwrite the mask because we guaranteed earlier
	* that there are no bits higher than the mask allows.
	*/
	- ((unsigned char *)str)[0] = lut[off].lower \|
	- (uint_least8_t)(cp >> (6 * off));
	+ ((unsigned char *)str)[0] =
	+ lut[off].lower \| (uint_least8_t)(cp >> (6 * off));

	for (i = 1; i <= off; i++) {
	/*
	@@ -211,8 +211,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t l…
	* extract from the properly-shifted value using the
	* mask 00111111 (0x3F)
	*/
	- ((unsigned char *)str)[i] = 0x80 \|
	- ((cp >> (6 * (off - i))) & 0x3F);
	+ ((unsigned char *)str)[i] =
	+ 0x80 \| ((cp >> (6 * (off - i))) & 0x3F);
	}

	return 1 + off;
	diff --git a/src/util.c b/src/util.c
	@@ -37,16 +37,20 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTU…
	*/
	dest->type = src->type;
	if (src->type == HERODOTUS_TYPE_CODEPOINT) {
	- dest->src = (src->src == NULL) ? NULL :
	- ((const uint_least32_t *)(src->src)) + src->off;
	+ dest->src =
	+ (src->src == NULL) ?
	+ NULL :
	+ ((const uint_least32_t *)(src->src)) + src->of…
	} else { /* src->type == HERODOTUS_TYPE_UTF8 */
	- dest->src = (src->src == NULL) ? NULL :
	- ((const char *)(src->src)) + src->off;
	+ dest->src = (src->src == NULL) ?
	+ NULL :
	+ ((const char *)(src->src)) + src->off;
	}
	if (src->srclen == SIZE_MAX) {
	dest->srclen = SIZE_MAX;
	} else {
	- dest->srclen = (src->off < src->srclen) ? src->srclen - src->o…
	+ dest->srclen =
	+ (src->off < src->srclen) ? src->srclen - src->off : 0;
	}
	dest->off = 0;
	dest->terminated_by_null = src->terminated_by_null;
	@@ -62,8 +66,10 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS…
	* to release the limit and, instead, we just
	* prevent any more reads
	*/
	- dest->soft_limit[i] = (src->off < src->soft_limit[i]) ?
	- src->soft_limit[i] - src->off : 0;
	+ dest->soft_limit[i] =
	+ (src->off < src->soft_limit[i]) ?
	+ src->soft_limit[i] - src->off :
	+ 0;
	}
	}
	}
	@@ -141,9 +147,9 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance,…
	cp = ((const uint_least32_t )(r->src))[r->off];
	ret = 1;
	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
	- ret = grapheme_decode_utf8((const char *)r->src + r->off,
	- MIN(r->srclen, r->soft_limit[0]) -
	- r->off, cp);
	+ ret = grapheme_decode_utf8(
	+ (const char *)r->src + r->off,
	+ MIN(r->srclen, r->soft_limit[0]) - r->off, cp);
	}

	if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
	@@ -176,8 +182,8 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance,…
	}

	void
	-herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type,
	- void *dest, size_t destlen)
	+herodotus_writer_init(HERODOTUS_WRITER w, enum herodotus_type type, void des…
	+ size_t destlen)
	{
	w->type = type;
	w->dest = dest;
	@@ -212,8 +218,8 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
	* (the last case meaning truncation).
	*/
	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
	- ((uint_least32_t *)(w->dest))
	- [w->first_unwritable_offset] = 0;
	+ ((uint_least32_t
	+ *)(w->dest))[w->first_unwritable_offset] = 0;
	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
	((char *)(w->dest))[w->first_unwritable_offset] = '\0';
	}
	@@ -226,8 +232,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
	* byte.
	*/
	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
	- ((uint_least32_t *)(w->dest))
	- [w->destlen - 1] = 0;
	+ ((uint_least32_t *)(w->dest))[w->destlen - 1] = 0;
	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
	((char *)(w->dest))[w->destlen - 1] = '\0';
	}
	@@ -267,8 +272,8 @@ herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32…

	if (w->dest != NULL && w->off + ret < w->destlen) {
	/* we still have enough room in the buffer */
	- grapheme_encode_utf8(cp, (char *)(w->dest) +
	- w->off, w->destlen - w->off);
	+ grapheme_encode_utf8(cp, (char *)(w->dest) + w->off,
	+ w->destlen - w->off);
	} else if (w->first_unwritable_offset == SIZE_MAX) {
	/*
	* the first unwritable offset has not been
	@@ -328,8 +333,9 @@ proper_init(const HERODOTUS_READER r, void state, uint_le…

	/* fill in the two next raw properties (after no-initialization) */
	p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
	- for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, …
	- HERODOTUS_STATUS_SUCCESS; ) {
	+ for (i = 0;
	+ i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
	+ HERODOTUS_STATUS_SUCCESS;) {
	p->raw.next_prop[i++] = p->get_break_prop(cp);
	}

	@@ -338,8 +344,9 @@ proper_init(const HERODOTUS_READER r, void state, uint_le…

	/* fill in the two next skip properties (after no-initialization) */
	p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
	- for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true,…
	- HERODOTUS_STATUS_SUCCESS; ) {
	+ for (i = 0;
	+ i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
	+ HERODOTUS_STATUS_SUCCESS;) {
	prop = p->get_break_prop(cp);
	if (!p->is_skippable_prop(prop)) {
	p->skip.next_prop[i++] = prop;
	diff --git a/src/util.h b/src/util.h
	@@ -10,25 +10,25 @@
	#include "../grapheme.h"

	#undef MIN
	-#define MIN(x,y) ((x) < (y) ? (x) : (y))
	+#define MIN(x, y) ((x) < (y) ? (x) : (y))
	#undef MAX
	-#define MAX(x,y) ((x) > (y) ? (x) : (y))
	+#define MAX(x, y) ((x) > (y) ? (x) : (y))
	#undef LEN
	#define LEN(x) (sizeof(x) / sizeof(*(x)))

	#undef likely
	#undef unlikely
	#ifdef __has_builtin
	- #if __has_builtin(__builtin_expect)
	- #define likely(expr) __builtin_expect(!!(expr), 1)
	- #define unlikely(expr) __builtin_expect(!!(expr), 0)
	- #else
	- #define likely(expr) (expr)
	- #define unlikely(expr) (expr)
	- #endif
	+#if __has_builtin(__builtin_expect)
	+#define likely(expr) __builtin_expect(!!(expr), 1)
	+#define unlikely(expr) __builtin_expect(!!(expr), 0)
	#else
	- #define likely(expr) (expr)
	- #define unlikely(expr) (expr)
	+#define likely(expr) (expr)
	+#define unlikely(expr) (expr)
	+#endif
	+#else
	+#define likely(expr) (expr)
	+#define unlikely(expr) (expr)
	#endif

	/*
	@@ -84,6 +84,7 @@ struct proper {
	uint_least8_t prev_prop[2];
	uint_least8_t next_prop[2];
	} raw, skip;
	+
	HERODOTUS_READER mid_reader, raw_reader, skip_reader;
	void *state;
	uint_least8_t no_prop;
	@@ -100,7 +101,8 @@ void herodotus_reader_pop_limit(HERODOTUS_READER *);
	size_t herodotus_reader_number_read(const HERODOTUS_READER *);
	size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
	size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
	-enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_…
	+enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool,
	+ uint_least32_t *);

	void herodotus_writer_init(HERODOTUS_WRITER , enum herodotus_type, void ,
	size_t);
	diff --git a/src/word.c b/src/word.c
	@@ -6,8 +6,7 @@
	#include "../grapheme.h"
	#include "util.h"

	-struct word_break_state
	-{
	+struct word_break_state {
	bool ri_even;
	};

	@@ -16,7 +15,8 @@ get_word_break_prop(uint_least32_t cp)
	{
	if (likely(cp <= UINT32_C(0x10FFFF))) {
	return (uint_least8_t)
	- word_break_minor[word_break_major[cp >> 8] + (cp & 0xff…
	+ word_break_minor[word_break_major[cp >> 8] +
	+ (cp & 0xff)];
	} else {
	return WORD_BREAK_PROP_OTHER;
	}
	@@ -26,8 +26,7 @@ static bool
	is_skippable_word_prop(uint_least8_t prop)
	{
	return prop == WORD_BREAK_PROP_EXTEND \|\|
	- prop == WORD_BREAK_PROP_FORMAT \|\|
	- prop == WORD_BREAK_PROP_ZWJ;
	+ prop == WORD_BREAK_PROP_FORMAT \|\| prop == WORD_BREAK_PROP_ZWJ;
	}

	static void
	@@ -79,22 +78,24 @@ next_word_break(HERODOTUS_READER *r)

	/* WB3a */
	if (p.raw.prev_prop[0] == WORD_BREAK_PROP_NEWLINE \|\|
	- p.raw.prev_prop[0] == WORD_BREAK_PROP_CR \|\|
	+ p.raw.prev_prop[0] == WORD_BREAK_PROP_CR \|\|
	p.raw.prev_prop[0] == WORD_BREAK_PROP_LF) {
	break;
	}

	/* WB3b */
	if (p.raw.next_prop[0] == WORD_BREAK_PROP_NEWLINE \|\|
	- p.raw.next_prop[0] == WORD_BREAK_PROP_CR \|\|
	+ p.raw.next_prop[0] == WORD_BREAK_PROP_CR \|\|
	p.raw.next_prop[0] == WORD_BREAK_PROP_LF) {
	break;
	}

	/* WB3c */
	if (p.raw.prev_prop[0] == WORD_BREAK_PROP_ZWJ &&
	- (p.raw.next_prop[0] == WORD_BREAK_PROP_EXTENDED_PICTOGRAPH…
	- p.raw.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPIC…
	+ (p.raw.next_prop[0] ==
	+ WORD_BREAK_PROP_EXTENDED_PICTOGRAPHIC \|\|
	+ p.raw.next_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT)) {
	continue;
	}

	@@ -112,37 +113,43 @@ next_word_break(HERODOTUS_READER *r)
	}

	/* WB5 */
	- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER …
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.prev_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
	- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER …
	- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.next_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) {
	continue;
	}

	/* WB6 */
	- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER …
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.prev_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
	- (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER \|\|
	- p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET \|\|
	+ (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER \|\|
	+ p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET \|\|
	p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
	- (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER …
	- p.skip.next_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	+ (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.next_prop[1] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	p.skip.next_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) {
	continue;
	}

	/* WB7 */
	- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER \|\|
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET \|\|
	+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER \|\|
	+ p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET \|\|
	p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
	- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER …
	- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.next_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
	- (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER …
	- p.skip.prev_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	+ (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.prev_prop[1] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	p.skip.prev_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) {
	continue;
	}
	@@ -174,8 +181,9 @@ next_word_break(HERODOTUS_READER *r)
	}

	/* WB9 */
	- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER …
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.prev_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
	p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC) {
	continue;
	@@ -183,15 +191,16 @@ next_word_break(HERODOTUS_READER *r)

	/* WB10 */
	if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
	- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER …
	- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.next_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) {
	continue;
	}

	/* WB11 */
	- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM \|\|
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET \|\|
	+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM \|\|
	+ p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET \|\|
	p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
	p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC &&
	p.skip.prev_prop[1] == WORD_BREAK_PROP_NUMERIC) {
	@@ -200,8 +209,8 @@ next_word_break(HERODOTUS_READER *r)

	/* WB12 */
	if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
	- (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM \|\|
	- p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET \|\|
	+ (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM \|\|
	+ p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET \|\|
	p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
	p.skip.next_prop[1] == WORD_BREAK_PROP_NUMERIC) {
	continue;
	@@ -214,11 +223,12 @@ next_word_break(HERODOTUS_READER *r)
	}

	/* WB13a */
	- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER …
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER …
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC …
	- p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA …
	+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.prev_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	+ p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER \|\|
	+ p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC \|\|
	+ p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA \|\|
	p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) &&
	p.skip.next_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) {
	continue;
	@@ -226,10 +236,11 @@ next_word_break(HERODOTUS_READER *r)

	/* WB13b */
	if (p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET &&
	- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER …
	- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPI…
	- p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER …
	- p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC …
	+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER \|\|
	+ p.skip.next_prop[0] ==
	+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT \|\|
	+ p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER \|\|
	+ p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC \|\|
	p.skip.next_prop[0] == WORD_BREAK_PROP_KATAKANA)) {
	continue;
	}
	diff --git a/test/bidirectional.c b/test/bidirectional.c
	@@ -25,14 +25,16 @@ main(int argc, char *argv[])

	for (i = 0, failed = 0; i < LEN(bidirectional_test); i++) {
	/*if (i != 490798)
	- continue;*/
	+ continue;*/

	for (m = 0; m < bidirectional_test[i].modelen; m++) {
	ret = grapheme_get_bidirectional_embedding_levels(
	- bidirectional_test[i].cp, bidirectional_test[i…
	+ bidirectional_test[i].cp,
	+ bidirectional_test[i].cplen,
	bidirectional_test[i].mode[m], lev, levlen);

	- if (ret != bidirectional_test[i].cplen \|\| ret > levlen…
	+ if (ret != bidirectional_test[i].cplen \|\|
	+ ret > levlen) {
	goto err;
	}

	@@ -43,18 +45,22 @@ main(int argc, char *argv[])
	}
	continue;
	err:
	- fprintf(stderr, "%s: Failed conformance test %zu (mode…
	+ fprintf(stderr,
	+ "%s: Failed conformance test %zu (mode %i) [",
	argv[0], i, bidirectional_test[i].mode[m]);
	for (j = 0; j < bidirectional_test[i].cplen; j++) {
	- fprintf(stderr, " 0x%04" PRIXLEAST32, bidirect…
	+ fprintf(stderr, " 0x%04" PRIXLEAST32,
	+ bidirectional_test[i].cp[j]);
	}
	fprintf(stderr, " ],\n\tgot (");
	for (j = 0; j < ret; j++) {
	- fprintf(stderr, " %" PRIdLEAST8, (int_least8_t…
	+ fprintf(stderr, " %" PRIdLEAST8,
	+ (int_least8_t)lev[j]);
	}
	fprintf(stderr, " ),\n\texpected (");
	for (j = 0; j < ret; j++) {
	- fprintf(stderr, " %" PRIdLEAST8, bidirectional…
	+ fprintf(stderr, " %" PRIdLEAST8,
	+ bidirectional_test[i].level[j]);
	}
	fprintf(stderr, " ).\n");
	failed++;
	diff --git a/test/case.c b/test/case.c
	@@ -9,10 +9,12 @@

	struct unit_test_is_case_utf8 {
	const char *description;
	+
	struct {
	const char *src;
	size_t srclen;
	} input;
	+
	struct {
	bool ret;
	size_t caselen;
	@@ -21,11 +23,13 @@ struct unit_test_is_case_utf8 {

	struct unit_test_to_case_utf8 {
	const char *description;
	+
	struct {
	const char *src;
	size_t srclen;
	size_t destlen;
	} input;
	+
	struct {
	const char *dest;
	size_t ret;
	@@ -35,57 +39,69 @@ struct unit_test_to_case_utf8 {
	static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
	{
	.description = "empty input",
	- .input = { "", 0 },
	+ .input = { "", 0 },
	.output = { true, 0 },
	},
	{
	.description = "one character, violation",
	- .input = { "A", 1 },
	+ .input = { "A", 1 },
	.output = { false, 0 },
	},
	{
	.description = "one character, confirmation",
	- .input = { "\xC3\x9F", 2 },
	+ .input = { "\xC3\x9F", 2 },
	.output = { true, 2 },
	},
	{
	.description = "one character, violation, NUL-terminated",
	- .input = { "A", SIZE_MAX },
	+ .input = { "A", SIZE_MAX },
	.output = { false, 0 },
	},
	{
	.description = "one character, confirmation, NUL-terminated",
	- .input = { "\xC3\x9F", SIZE_MAX },
	+ .input = { "\xC3\x9F", SIZE_MAX },
	.output = { true, 2 },
	},
	{
	.description = "one word, violation",
	- .input = { "Hello", 5 },
	+ .input = { "Hello", 5 },
	.output = { false, 0 },
	},
	{
	.description = "one word, partial confirmation",
	- .input = { "gru" "\xC3\x9F" "fOrmel", 11 },
	+ .input = { "gru"
	+ "\xC3\x9F"
	+ "fOrmel",
	+ 11 },
	.output = { false, 6 },
	},
	{
	.description = "one word, full confirmation",
	- .input = { "gru" "\xC3\x9F" "formel", 11 },
	+ .input = { "gru"
	+ "\xC3\x9F"
	+ "formel",
	+ 11 },
	.output = { true, 11 },
	},
	{
	.description = "one word, violation, NUL-terminated",
	- .input = { "Hello", SIZE_MAX },
	+ .input = { "Hello", SIZE_MAX },
	.output = { false, 0 },
	},
	{
	.description = "one word, partial confirmation, NUL-terminated…
	- .input = { "gru" "\xC3\x9F" "fOrmel", SIZE_MAX },
	+ .input = { "gru"
	+ "\xC3\x9F"
	+ "fOrmel",
	+ SIZE_MAX },
	.output = { false, 6 },
	},
	{
	.description = "one word, full confirmation, NUL-terminated",
	- .input = { "gru" "\xC3\x9F" "formel", SIZE_MAX },
	+ .input = { "gru"
	+ "\xC3\x9F"
	+ "formel",
	+ SIZE_MAX },
	.output = { true, 11 },
	},
	};
	@@ -93,57 +109,63 @@ static const struct unit_test_is_case_utf8 is_lowercase_ut…
	static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
	{
	.description = "empty input",
	- .input = { "", 0 },
	+ .input = { "", 0 },
	.output = { true, 0 },
	},
	{
	.description = "one character, violation",
	- .input = { "\xC3\x9F", 2 },
	+ .input = { "\xC3\x9F", 2 },
	.output = { false, 0 },
	},
	{
	.description = "one character, confirmation",
	- .input = { "A", 1 },
	+ .input = { "A", 1 },
	.output = { true, 1 },
	},
	{
	.description = "one character, violation, NUL-terminated",
	- .input = { "\xC3\x9F", SIZE_MAX },
	+ .input = { "\xC3\x9F", SIZE_MAX },
	.output = { false, 0 },
	},
	{
	.description = "one character, confirmation, NUL-terminated",
	- .input = { "A", SIZE_MAX },
	+ .input = { "A", SIZE_MAX },
	.output = { true, 1 },
	},
	{
	.description = "one word, violation",
	- .input = { "hello", 5 },
	+ .input = { "hello", 5 },
	.output = { false, 0 },
	},
	{
	.description = "one word, partial confirmation",
	- .input = { "GRU" "\xC3\x9F" "formel", 11 },
	+ .input = { "GRU"
	+ "\xC3\x9F"
	+ "formel",
	+ 11 },
	.output = { false, 3 },
	},
	{
	.description = "one word, full confirmation",
	- .input = { "HELLO", 5 },
	+ .input = { "HELLO", 5 },
	.output = { true, 5 },
	},
	{
	.description = "one word, violation, NUL-terminated",
	- .input = { "hello", SIZE_MAX },
	+ .input = { "hello", SIZE_MAX },
	.output = { false, 0 },
	},
	{
	.description = "one word, partial confirmation, NUL-terminated…
	- .input = { "GRU" "\xC3\x9F" "formel", SIZE_MAX },
	+ .input = { "GRU"
	+ "\xC3\x9F"
	+ "formel",
	+ SIZE_MAX },
	.output = { false, 3 },
	},
	{
	.description = "one word, full confirmation, NUL-terminated",
	- .input = { "HELLO", SIZE_MAX },
	+ .input = { "HELLO", SIZE_MAX },
	.output = { true, 5 },
	},
	};
	@@ -151,77 +173,103 @@ static const struct unit_test_is_case_utf8 is_uppercase_…
	static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
	{
	.description = "empty input",
	- .input = { "", 0 },
	+ .input = { "", 0 },
	.output = { true, 0 },
	},
	{
	.description = "one character, violation",
	- .input = { "\xC3\x9F", 2 },
	+ .input = { "\xC3\x9F", 2 },
	.output = { false, 0 },
	},
	{
	.description = "one character, confirmation",
	- .input = { "A", 1 },
	+ .input = { "A", 1 },
	.output = { true, 1 },
	},
	{
	.description = "one character, violation, NUL-terminated",
	- .input = { "\xC3\x9F", SIZE_MAX },
	+ .input = { "\xC3\x9F", SIZE_MAX },
	.output = { false, 0 },
	},
	{
	.description = "one character, confirmation, NUL-terminated",
	- .input = { "A", SIZE_MAX },
	+ .input = { "A", SIZE_MAX },
	.output = { true, 1 },
	},
	{
	.description = "one word, violation",
	- .input = { "hello", 5 },
	+ .input = { "hello", 5 },
	.output = { false, 0 },
	},
	{
	.description = "one word, partial confirmation",
	- .input = { "Gru" "\xC3\x9F" "fOrmel", 11 },
	+ .input = { "Gru"
	+ "\xC3\x9F"
	+ "fOrmel",
	+ 11 },
	.output = { false, 6 },
	},
	{
	.description = "one word, full confirmation",
	- .input = { "Gru" "\xC3\x9F" "formel", 11 },
	+ .input = { "Gru"
	+ "\xC3\x9F"
	+ "formel",
	+ 11 },
	.output = { true, 11 },
	},
	{
	.description = "one word, violation, NUL-terminated",
	- .input = { "hello", SIZE_MAX },
	+ .input = { "hello", SIZE_MAX },
	.output = { false, 0 },
	},
	{
	.description = "one word, partial confirmation, NUL-terminated…
	- .input = { "Gru" "\xC3\x9F" "fOrmel", SIZE_MAX },
	+ .input = { "Gru"
	+ "\xC3\x9F"
	+ "fOrmel",
	+ SIZE_MAX },
	.output = { false, 6 },
	},
	{
	.description = "one word, full confirmation, NUL-terminated",
	- .input = { "Gru" "\xC3\x9F" "formel", SIZE_MAX },
	+ .input = { "Gru"
	+ "\xC3\x9F"
	+ "formel",
	+ SIZE_MAX },
	.output = { true, 11 },
	},
	{
	.description = "multiple words, partial confirmation",
	- .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", 18 },
	+ .input = { "Hello Gru"
	+ "\xC3\x9F"
	+ "fOrmel!",
	+ 18 },
	.output = { false, 12 },
	},
	{
	.description = "multiple words, full confirmation",
	- .input = { "Hello Gru" "\xC3\x9F" "formel!", 18 },
	+ .input = { "Hello Gru"
	+ "\xC3\x9F"
	+ "formel!",
	+ 18 },
	.output = { true, 18 },
	},
	{
	- .description = "multiple words, partial confirmation, NUL-term…
	- .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", SIZE_MAX },
	+ .description =
	+ "multiple words, partial confirmation, NUL-terminated",
	+ .input = { "Hello Gru"
	+ "\xC3\x9F"
	+ "fOrmel!",
	+ SIZE_MAX },
	.output = { false, 12 },
	},
	{
	- .description = "multiple words, full confirmation, NUL-termina…
	- .input = { "Hello Gru" "\xC3\x9F" "formel!", SIZE_MAX },
	+ .description =
	+ "multiple words, full confirmation, NUL-terminated",
	+ .input = { "Hello Gru"
	+ "\xC3\x9F"
	+ "formel!",
	+ SIZE_MAX },
	.output = { true, 18 },
	},
	};
	@@ -229,72 +277,74 @@ static const struct unit_test_is_case_utf8 is_titlecase_u…
	static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
	{
	.description = "empty input",
	- .input = { "", 0, 10 },
	+ .input = { "", 0, 10 },
	.output = { "", 0 },
	},
	{
	.description = "empty output",
	- .input = { "hello", 5, 0 },
	+ .input = { "hello", 5, 0 },
	.output = { "", 5 },
	},
	{
	.description = "one character, conversion",
	- .input = { "A", 1, 10 },
	+ .input = { "A", 1, 10 },
	.output = { "a", 1 },
	},
	{
	.description = "one character, no conversion",
	- .input = { "\xC3\x9F", 2, 10 },
	+ .input = { "\xC3\x9F", 2, 10 },
	.output = { "\xC3\x9F", 2 },
	},
	{
	.description = "one character, conversion, truncation",
	- .input = { "A", 1, 0 },
	+ .input = { "A", 1, 0 },
	.output = { "", 1 },
	},
	{
	.description = "one character, conversion, NUL-terminated",
	- .input = { "A", SIZE_MAX, 10 },
	+ .input = { "A", SIZE_MAX, 10 },
	.output = { "a", 1 },
	},
	{
	.description = "one character, no conversion, NUL-terminated",
	- .input = { "\xC3\x9F", SIZE_MAX, 10 },
	+ .input = { "\xC3\x9F", SIZE_MAX, 10 },
	.output = { "\xC3\x9F", 2 },
	},
	{
	- .description = "one character, conversion, NUL-terminated, tru…
	- .input = { "A", SIZE_MAX, 0 },
	+ .description =
	+ "one character, conversion, NUL-terminated, truncation…
	+ .input = { "A", SIZE_MAX, 0 },
	.output = { "", 1 },
	},
	{
	.description = "one word, conversion",
	- .input = { "wOrD", 4, 10 },
	+ .input = { "wOrD", 4, 10 },
	.output = { "word", 4 },
	},
	{
	.description = "one word, no conversion",
	- .input = { "word", 4, 10 },
	+ .input = { "word", 4, 10 },
	.output = { "word", 4 },
	},
	{
	.description = "one word, conversion, truncation",
	- .input = { "wOrD", 4, 3 },
	+ .input = { "wOrD", 4, 3 },
	.output = { "wo", 4 },
	},
	{
	.description = "one word, conversion, NUL-terminated",
	- .input = { "wOrD", SIZE_MAX, 10 },
	+ .input = { "wOrD", SIZE_MAX, 10 },
	.output = { "word", 4 },
	},
	{
	.description = "one word, no conversion, NUL-terminated",
	- .input = { "word", SIZE_MAX, 10 },
	+ .input = { "word", SIZE_MAX, 10 },
	.output = { "word", 4 },
	},
	{
	- .description = "one word, conversion, NUL-terminated, truncati…
	- .input = { "wOrD", SIZE_MAX, 3 },
	+ .description =
	+ "one word, conversion, NUL-terminated, truncation",
	+ .input = { "wOrD", SIZE_MAX, 3 },
	.output = { "wo", 4 },
	},
	};
	@@ -302,72 +352,86 @@ static const struct unit_test_to_case_utf8 to_lowercase_u…
	static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
	{
	.description = "empty input",
	- .input = { "", 0, 10 },
	+ .input = { "", 0, 10 },
	.output = { "", 0 },
	},
	{
	.description = "empty output",
	- .input = { "hello", 5, 0 },
	+ .input = { "hello", 5, 0 },
	.output = { "", 5 },
	},
	{
	.description = "one character, conversion",
	- .input = { "\xC3\x9F", 2, 10 },
	+ .input = { "\xC3\x9F", 2, 10 },
	.output = { "SS", 2 },
	},
	{
	.description = "one character, no conversion",
	- .input = { "A", 1, 10 },
	+ .input = { "A", 1, 10 },
	.output = { "A", 1 },
	},
	{
	.description = "one character, conversion, truncation",
	- .input = { "\xC3\x9F", 2, 0 },
	+ .input = { "\xC3\x9F", 2, 0 },
	.output = { "", 2 },
	},
	{
	.description = "one character, conversion, NUL-terminated",
	- .input = { "\xC3\x9F", SIZE_MAX, 10 },
	+ .input = { "\xC3\x9F", SIZE_MAX, 10 },
	.output = { "SS", 2 },
	},
	{
	.description = "one character, no conversion, NUL-terminated",
	- .input = { "A", SIZE_MAX, 10 },
	+ .input = { "A", SIZE_MAX, 10 },
	.output = { "A", 1 },
	},
	{
	- .description = "one character, conversion, NUL-terminated, tru…
	- .input = { "\xC3\x9F", SIZE_MAX, 0 },
	+ .description =
	+ "one character, conversion, NUL-terminated, truncation…
	+ .input = { "\xC3\x9F", SIZE_MAX, 0 },
	.output = { "", 2 },
	},
	{
	.description = "one word, conversion",
	- .input = { "gRu" "\xC3\x9F" "fOrMel", 11, 15 },
	+ .input = { "gRu"
	+ "\xC3\x9F"
	+ "fOrMel",
	+ 11, 15 },
	.output = { "GRUSSFORMEL", 11 },
	},
	{
	.description = "one word, no conversion",
	- .input = { "WORD", 4, 10 },
	+ .input = { "WORD", 4, 10 },
	.output = { "WORD", 4 },
	},
	{
	.description = "one word, conversion, truncation",
	- .input = { "gRu" "\xC3\x9F" "formel", 11, 5 },
	+ .input = { "gRu"
	+ "\xC3\x9F"
	+ "formel",
	+ 11, 5 },
	.output = { "GRUS", 11 },
	},
	{
	.description = "one word, conversion, NUL-terminated",
	- .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 15 },
	+ .input = { "gRu"
	+ "\xC3\x9F"
	+ "formel",
	+ SIZE_MAX, 15 },
	.output = { "GRUSSFORMEL", 11 },
	},
	{
	.description = "one word, no conversion, NUL-terminated",
	- .input = { "WORD", SIZE_MAX, 10 },
	+ .input = { "WORD", SIZE_MAX, 10 },
	.output = { "WORD", 4 },
	},
	{
	- .description = "one word, conversion, NUL-terminated, truncati…
	- .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 5 },
	+ .description =
	+ "one word, conversion, NUL-terminated, truncation",
	+ .input = { "gRu"
	+ "\xC3\x9F"
	+ "formel",
	+ SIZE_MAX, 5 },
	.output = { "GRUS", 11 },
	},
	};
	@@ -375,102 +439,105 @@ static const struct unit_test_to_case_utf8 to_uppercase…
	static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
	{
	.description = "empty input",
	- .input = { "", 0, 10 },
	+ .input = { "", 0, 10 },
	.output = { "", 0 },
	},
	{
	.description = "empty output",
	- .input = { "hello", 5, 0 },
	+ .input = { "hello", 5, 0 },
	.output = { "", 5 },
	},
	{
	.description = "one character, conversion",
	- .input = { "a", 1, 10 },
	+ .input = { "a", 1, 10 },
	.output = { "A", 1 },
	},
	{
	.description = "one character, no conversion",
	- .input = { "A", 1, 10 },
	+ .input = { "A", 1, 10 },
	.output = { "A", 1 },
	},
	{
	.description = "one character, conversion, truncation",
	- .input = { "a", 1, 0 },
	+ .input = { "a", 1, 0 },
	.output = { "", 1 },
	},
	{
	.description = "one character, conversion, NUL-terminated",
	- .input = { "a", SIZE_MAX, 10 },
	+ .input = { "a", SIZE_MAX, 10 },
	.output = { "A", 1 },
	},
	{
	.description = "one character, no conversion, NUL-terminated",
	- .input = { "A", SIZE_MAX, 10 },
	+ .input = { "A", SIZE_MAX, 10 },
	.output = { "A", 1 },
	},
	{
	- .description = "one character, conversion, NUL-terminated, tru…
	- .input = { "a", SIZE_MAX, 0 },
	+ .description =
	+ "one character, conversion, NUL-terminated, truncation…
	+ .input = { "a", SIZE_MAX, 0 },
	.output = { "", 1 },
	},
	{
	.description = "one word, conversion",
	- .input = { "heLlo", 5, 10 },
	+ .input = { "heLlo", 5, 10 },
	.output = { "Hello", 5 },
	},
	{
	.description = "one word, no conversion",
	- .input = { "Hello", 5, 10 },
	+ .input = { "Hello", 5, 10 },
	.output = { "Hello", 5 },
	},
	{
	.description = "one word, conversion, truncation",
	- .input = { "heLlo", 5, 2 },
	+ .input = { "heLlo", 5, 2 },
	.output = { "H", 5 },
	},
	{
	.description = "one word, conversion, NUL-terminated",
	- .input = { "heLlo", SIZE_MAX, 10 },
	+ .input = { "heLlo", SIZE_MAX, 10 },
	.output = { "Hello", 5 },
	},
	{
	.description = "one word, no conversion, NUL-terminated",
	- .input = { "Hello", SIZE_MAX, 10 },
	+ .input = { "Hello", SIZE_MAX, 10 },
	.output = { "Hello", 5 },
	},
	{
	- .description = "one word, conversion, NUL-terminated, truncati…
	- .input = { "heLlo", SIZE_MAX, 3 },
	+ .description =
	+ "one word, conversion, NUL-terminated, truncation",
	+ .input = { "heLlo", SIZE_MAX, 3 },
	.output = { "He", 5 },
	},
	{
	.description = "two words, conversion",
	- .input = { "heLlo wORLd!", 12, 20 },
	+ .input = { "heLlo wORLd!", 12, 20 },
	.output = { "Hello World!", 12 },
	},
	{
	.description = "two words, no conversion",
	- .input = { "Hello World!", 12, 20 },
	+ .input = { "Hello World!", 12, 20 },
	.output = { "Hello World!", 12 },
	},
	{
	.description = "two words, conversion, truncation",
	- .input = { "heLlo wORLd!", 12, 8 },
	+ .input = { "heLlo wORLd!", 12, 8 },
	.output = { "Hello W", 12 },
	},
	{
	.description = "two words, conversion, NUL-terminated",
	- .input = { "heLlo wORLd!", SIZE_MAX, 20 },
	+ .input = { "heLlo wORLd!", SIZE_MAX, 20 },
	.output = { "Hello World!", 12 },
	},
	{
	.description = "two words, no conversion, NUL-terminated",
	- .input = { "Hello World!", SIZE_MAX, 20 },
	+ .input = { "Hello World!", SIZE_MAX, 20 },
	.output = { "Hello World!", 12 },
	},
	{
	- .description = "two words, conversion, NUL-terminated, truncat…
	- .input = { "heLlo wORLd!", SIZE_MAX, 4 },
	+ .description =
	+ "two words, conversion, NUL-terminated, truncation",
	+ .input = { "heLlo wORLd!", SIZE_MAX, 4 },
	.output = { "Hel", 12 },
	},
	};
	@@ -485,14 +552,14 @@ unit_test_callback_is_case_utf8(const void *t, size_t off…
	size_t caselen = 0x7f;

	if (t == is_lowercase_utf8) {
	- ret = grapheme_is_lowercase_utf8(test->input.src, test->input.…
	- &caselen);
	+ ret = grapheme_is_lowercase_utf8(test->input.src,
	+ test->input.srclen, &caselen);
	} else if (t == is_uppercase_utf8) {
	- ret = grapheme_is_uppercase_utf8(test->input.src, test->input.…
	- &caselen);
	+ ret = grapheme_is_uppercase_utf8(test->input.src,
	+ test->input.srclen, &caselen);
	} else if (t == is_titlecase_utf8) {
	- ret = grapheme_is_titlecase_utf8(test->input.src, test->input.…
	- &caselen);
	+ ret = grapheme_is_titlecase_utf8(test->input.src,
	+ test->input.srclen, &caselen);

	} else {
	goto err;
	@@ -505,10 +572,11 @@ unit_test_callback_is_case_utf8(const void *t, size_t off…

	return 0;
	err:
	- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
	- "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0,
	- name, off, test->description, ret ? "true" : "false",
	- caselen, test->output.ret ? "true" : "false",
	+ fprintf(stderr,
	+ "%s: %s: Failed unit test %zu \"%s\" "
	+ "(returned (%s, %zu) instead of (%s, %zu)).\n",
	+ argv0, name, off, test->description, ret ? "true" : "false",
	+ caselen, test->output.ret ? "true" : "false",
	test->output.caselen);
	return 1;
	}
	@@ -526,21 +594,25 @@ unit_test_callback_to_case_utf8(const void *t, size_t off…
	memset(buf, 0x7f, LEN(buf));

	if (t == to_lowercase_utf8) {
	- ret = grapheme_to_lowercase_utf8(test->input.src, test->input.…
	- buf, test->input.destlen);
	+ ret = grapheme_to_lowercase_utf8(test->input.src,
	+ test->input.srclen, buf,
	+ test->input.destlen);
	} else if (t == to_uppercase_utf8) {
	- ret = grapheme_to_uppercase_utf8(test->input.src, test->input.…
	- buf, test->input.destlen);
	+ ret = grapheme_to_uppercase_utf8(test->input.src,
	+ test->input.srclen, buf,
	+ test->input.destlen);
	} else if (t == to_titlecase_utf8) {
	- ret = grapheme_to_titlecase_utf8(test->input.src, test->input.…
	- buf, test->input.destlen);
	+ ret = grapheme_to_titlecase_utf8(test->input.src,
	+ test->input.srclen, buf,
	+ test->input.destlen);
	} else {
	goto err;
	}

	/* check results */
	if (ret != test->output.ret \|\|
	- memcmp(buf, test->output.dest, MIN(test->input.destlen, test->outp…
	+ memcmp(buf, test->output.dest,
	+ MIN(test->input.destlen, test->output.ret))) {
	goto err;
	}

	@@ -553,9 +625,10 @@ unit_test_callback_to_case_utf8(const void *t, size_t off,…

	return 0;
	err:
	- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
	- "(returned (\"%.s\", %zu) instead of (\"%.s\", %zu)).\n", ar…
	- name, off, test->description, (int)ret, buf, ret,
	+ fprintf(stderr,
	+ "%s: %s: Failed unit test %zu \"%s\" "
	+ "(returned (\"%.s\", %zu) instead of (\"%.s\", %zu)).\n",
	+ argv0, name, off, test->description, (int)ret, buf, ret,
	(int)test->output.ret, test->output.dest, test->output.ret);
	return 1;
	}
	@@ -565,16 +638,22 @@ main(int argc, char *argv[])
	{
	(void)argc;

	- return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_ut…
	- LEN(is_lowercase_utf8), "grapheme_is_lowercase_u…
	- run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_ut…
	- LEN(is_uppercase_utf8), "grapheme_is_uppercase_u…
	- run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_ut…
	- LEN(is_titlecase_utf8), "grapheme_is_titlecase_u…
	- run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_ut…
	- LEN(to_lowercase_utf8), "grapheme_to_lowercase_u…
	- run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_ut…
	- LEN(to_uppercase_utf8), "grapheme_to_uppercase_u…
	- run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_ut…
	- LEN(to_titlecase_utf8), "grapheme_to_titlecase_u…
	+ return run_unit_tests(unit_test_callback_is_case_utf8,
	+ is_lowercase_utf8, LEN(is_lowercase_utf8),
	+ "grapheme_is_lowercase_utf8", argv[0]) +
	+ run_unit_tests(unit_test_callback_is_case_utf8,
	+ is_uppercase_utf8, LEN(is_uppercase_utf8),
	+ "grapheme_is_uppercase_utf8", argv[0]) +
	+ run_unit_tests(unit_test_callback_is_case_utf8,
	+ is_titlecase_utf8, LEN(is_titlecase_utf8),
	+ "grapheme_is_titlecase_utf8", argv[0]) +
	+ run_unit_tests(unit_test_callback_to_case_utf8,
	+ to_lowercase_utf8, LEN(to_lowercase_utf8),
	+ "grapheme_to_lowercase_utf8", argv[0]) +
	+ run_unit_tests(unit_test_callback_to_case_utf8,
	+ to_uppercase_utf8, LEN(to_uppercase_utf8),
	+ "grapheme_to_uppercase_utf8", argv[0]) +
	+ run_unit_tests(unit_test_callback_to_case_utf8,
	+ to_titlecase_utf8, LEN(to_titlecase_utf8),
	+ "grapheme_to_titlecase_utf8", argv[0]);
	}
	diff --git a/test/character.c b/test/character.c
	@@ -92,12 +92,10 @@ static const struct unit_test_next_break_utf8 next_characte…

	static int
	unit_test_callback_next_character_break(const void *t, size_t off,
	- const char *name,
	- const char *argv0)
	+ const char name, const char argv0)
	{
	- return unit_test_callback_next_break(t, off,
	- grapheme_next_character_break,
	- name, argv0);
	+ return unit_test_callback_next_break(
	+ t, off, grapheme_next_character_break, name, argv0);
	}

	static int
	@@ -105,9 +103,8 @@ unit_test_callback_next_character_break_utf8(const void *t,…
	const char *name,
	const char *argv0)
	{
	- return unit_test_callback_next_break_utf8(t, off,
	- grapheme_next_character_brea…
	- name, argv0);
	+ return unit_test_callback_next_break_utf8(
	+ t, off, grapheme_next_character_break_utf8, name, argv0);
	}

	int
	@@ -116,11 +113,13 @@ main(int argc, char *argv[])
	(void)argc;

	return run_break_tests(grapheme_next_character_break,
	- character_break_test, LEN(character_break_test)…
	+ character_break_test, LEN(character_break_test),
	+ argv[0]) +
	run_unit_tests(unit_test_callback_next_character_break,
	next_character_break, LEN(next_character_break),
	"grapheme_next_character_break", argv[0]) +
	run_unit_tests(unit_test_callback_next_character_break_utf8,
	- next_character_break_utf8, LEN(next_character_br…
	+ next_character_break_utf8,
	+ LEN(next_character_break_utf8),
	"grapheme_next_character_break_utf8", argv[0]);
	}
	diff --git a/test/line.c b/test/line.c
	@@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_line_bre…
	};

	static int
	-unit_test_callback_next_line_break(const void *t, size_t off,
	- const char *name,
	- const char *argv0)
	+unit_test_callback_next_line_break(const void t, size_t off, const char name,
	+ const char *argv0)
	{
	- return unit_test_callback_next_break(t, off,
	- grapheme_next_line_break,
	+ return unit_test_callback_next_break(t, off, grapheme_next_line_break,
	name, argv0);
	}

	static int
	unit_test_callback_next_line_break_utf8(const void *t, size_t off,
	- const char *name,
	- const char *argv0)
	+ const char name, const char argv0)
	{
	- return unit_test_callback_next_break_utf8(t, off,
	- grapheme_next_line_break_utf…
	- name, argv0);
	+ return unit_test_callback_next_break_utf8(
	+ t, off, grapheme_next_line_break_utf8, name, argv0);
	}

	int
	@@ -115,9 +111,8 @@ main(int argc, char *argv[])
	{
	(void)argc;

	- return run_break_tests(grapheme_next_line_break,
	- line_break_test, LEN(line_break_test),
	- argv[0]) +
	+ return run_break_tests(grapheme_next_line_break, line_break_test,
	+ LEN(line_break_test), argv[0]) +
	run_unit_tests(unit_test_callback_next_line_break,
	next_line_break, LEN(next_line_break),
	"grapheme_next_line_break", argv[0]) +
	diff --git a/test/sentence.c b/test/sentence.c
	@@ -92,22 +92,18 @@ static const struct unit_test_next_break_utf8 next_sentence…

	static int
	unit_test_callback_next_sentence_break(const void *t, size_t off,
	- const char *name,
	- const char *argv0)
	+ const char name, const char argv0)
	{
	- return unit_test_callback_next_break(t, off,
	- grapheme_next_sentence_break,
	- name, argv0);
	+ return unit_test_callback_next_break(
	+ t, off, grapheme_next_sentence_break, name, argv0);
	}

	static int
	unit_test_callback_next_sentence_break_utf8(const void *t, size_t off,
	- const char *name,
	- const char *argv0)
	+ const char name, const char argv…
	{
	- return unit_test_callback_next_break_utf8(t, off,
	- grapheme_next_sentence_break…
	- name, argv0);
	+ return unit_test_callback_next_break_utf8(
	+ t, off, grapheme_next_sentence_break_utf8, name, argv0);
	}

	int
	@@ -116,12 +112,13 @@ main(int argc, char *argv[])
	(void)argc;

	return run_break_tests(grapheme_next_sentence_break,
	- sentence_break_test,
	- LEN(sentence_break_test), argv[0]) +
	+ sentence_break_test, LEN(sentence_break_test),
	+ argv[0]) +
	run_unit_tests(unit_test_callback_next_sentence_break,
	next_sentence_break, LEN(next_sentence_break),
	"grapheme_next_sentence_break", argv[0]) +
	run_unit_tests(unit_test_callback_next_sentence_break_utf8,
	- next_sentence_break_utf8, LEN(next_sentence_brea…
	+ next_sentence_break_utf8,
	+ LEN(next_sentence_break_utf8),
	"grapheme_next_character_break_utf8", argv[0]);
	}
	diff --git a/test/utf8-decode.c b/test/utf8-decode.c
	@@ -8,281 +8,279 @@
	#include "util.h"

	static const struct {
	- char arr; / UTF-8 byte sequence */
	- size_t len; /* length of UTF-8 byte sequence */
	- size_t exp_len; /* expected length returned */
	- uint_least32_t exp_cp; /* expected codepoint returned */
	+ char arr; / UTF-8 byte sequence */
	+ size_t len; /* length of UTF-8 byte sequence */
	+ size_t exp_len; /* expected length returned */
	+ uint_least32_t exp_cp; /* expected codepoint returned */
	} dec_test[] = {
	{
	/* empty sequence
	- * [ ] ->
	- * INVALID
	- */
	- .arr = NULL,
	- .len = 0,
	+ * [ ] ->
	+ * INVALID
	+ */
	+ .arr = NULL,
	+ .len = 0,
	.exp_len = 0,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid lead byte
	- * [ 11111101 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xFD },
	- .len = 1,
	+ * [ 11111101 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xFD },
	+ .len = 1,
	.exp_len = 1,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* valid 1-byte sequence
	- * [ 00000001 ] ->
	- * 0000001
	- */
	- .arr = (char *)(unsigned char[]){ 0x01 },
	- .len = 1,
	+ * [ 00000001 ] ->
	+ * 0000001
	+ */
	+ .arr = (char *)(unsigned char[]) { 0x01 },
	+ .len = 1,
	.exp_len = 1,
	- .exp_cp = 0x1,
	+ .exp_cp = 0x1,
	},
	{
	/* valid 2-byte sequence
	- * [ 11000011 10111111 ] ->
	- * 00011111111
	- */
	- .arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
	- .len = 2,
	+ * [ 11000011 10111111 ] ->
	+ * 00011111111
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
	+ .len = 2,
	.exp_len = 2,
	- .exp_cp = 0xFF,
	+ .exp_cp = 0xFF,
	},
	{
	/* invalid 2-byte sequence (second byte missing)
	- * [ 11000011 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xC3 },
	- .len = 1,
	+ * [ 11000011 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xC3 },
	+ .len = 1,
	.exp_len = 2,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 2-byte sequence (second byte malformed)
	- * [ 11000011 11111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xC3, 0xFF },
	- .len = 2,
	+ * [ 11000011 11111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xC3, 0xFF },
	+ .len = 2,
	.exp_len = 1,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 2-byte sequence (overlong encoded)
	- * [ 11000001 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xC1, 0xBF },
	- .len = 2,
	+ * [ 11000001 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xC1, 0xBF },
	+ .len = 2,
	.exp_len = 2,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* valid 3-byte sequence
	- * [ 11100000 10111111 10111111 ] ->
	- * 0000111111111111
	- */
	- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
	- .len = 3,
	+ * [ 11100000 10111111 10111111 ] ->
	+ * 0000111111111111
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
	+ .len = 3,
	.exp_len = 3,
	- .exp_cp = 0xFFF,
	+ .exp_cp = 0xFFF,
	},
	{
	/* invalid 3-byte sequence (second byte missing)
	- * [ 11100000 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xE0 },
	- .len = 1,
	+ * [ 11100000 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xE0 },
	+ .len = 1,
	.exp_len = 3,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 3-byte sequence (second byte malformed)
	- * [ 11100000 01111111 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF },
	- .len = 3,
	+ * [ 11100000 01111111 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xE0, 0x7F, 0xBF },
	+ .len = 3,
	.exp_len = 1,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 3-byte sequence (short string, second byte malforme…
	- * [ 11100000 01111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xE0, 0x7F },
	- .len = 2,
	+ * [ 11100000 01111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xE0, 0x7F },
	+ .len = 2,
	.exp_len = 1,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 3-byte sequence (third byte missing)
	- * [ 11100000 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF },
	- .len = 2,
	+ * [ 11100000 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF },
	+ .len = 2,
	.exp_len = 3,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 3-byte sequence (third byte malformed)
	- * [ 11100000 10111111 01111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F },
	- .len = 3,
	+ * [ 11100000 10111111 01111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0x7F },
	+ .len = 3,
	.exp_len = 2,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 3-byte sequence (overlong encoded)
	- * [ 11100000 10011111 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF },
	- .len = 3,
	+ * [ 11100000 10011111 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xE0, 0x9F, 0xBF },
	+ .len = 3,
	.exp_len = 3,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 3-byte sequence (UTF-16 surrogate half)
	- * [ 11101101 10100000 10000000 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 },
	- .len = 3,
	+ * [ 11101101 10100000 10000000 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xED, 0xA0, 0x80 },
	+ .len = 3,
	.exp_len = 3,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* valid 4-byte sequence
	- * [ 11110011 10111111 10111111 10111111 ] ->
	- * 011111111111111111111
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
	- .len = 4,
	+ * [ 11110011 10111111 10111111 10111111 ] ->
	+ * 011111111111111111111
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF },
	+ .len = 4,
	.exp_len = 4,
	- .exp_cp = UINT32_C(0xFFFFF),
	+ .exp_cp = UINT32_C(0xFFFFF),
	},
	{
	/* invalid 4-byte sequence (second byte missing)
	- * [ 11110011 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3 },
	- .len = 1,
	+ * [ 11110011 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3 },
	+ .len = 1,
	.exp_len = 4,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 4-byte sequence (second byte malformed)
	- * [ 11110011 01111111 10111111 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF },
	- .len = 4,
	+ * [ 11110011 01111111 10111111 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF, 0xBF },
	+ .len = 4,
	.exp_len = 1,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	- /* invalid 4-byte sequence (short string 1, second byte malfor…
	- * [ 11110011 011111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F },
	- .len = 2,
	+ /* invalid 4-byte sequence (short string 1, second byte
	+ * malformed) [ 11110011 011111111 ] -> INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F },
	+ .len = 2,
	.exp_len = 1,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	- /* invalid 4-byte sequence (short string 2, second byte malfor…
	- * [ 11110011 011111111 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF },
	- .len = 3,
	+ /* invalid 4-byte sequence (short string 2, second byte
	+ * malformed) [ 11110011 011111111 10111111 ] -> INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF },
	+ .len = 3,
	.exp_len = 1,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},

	{
	/* invalid 4-byte sequence (third byte missing)
	- * [ 11110011 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF },
	- .len = 2,
	+ * [ 11110011 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF },
	+ .len = 2,
	.exp_len = 4,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 4-byte sequence (third byte malformed)
	- * [ 11110011 10111111 01111111 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF },
	- .len = 4,
	+ * [ 11110011 10111111 01111111 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F, 0xBF },
	+ .len = 4,
	.exp_len = 2,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 4-byte sequence (short string, third byte malformed)
	- * [ 11110011 10111111 01111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F },
	- .len = 3,
	+ * [ 11110011 10111111 01111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F },
	+ .len = 3,
	.exp_len = 2,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 4-byte sequence (fourth byte missing)
	- * [ 11110011 10111111 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF },
	- .len = 3,
	+ * [ 11110011 10111111 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF },
	+ .len = 3,
	.exp_len = 4,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 4-byte sequence (fourth byte malformed)
	- * [ 11110011 10111111 10111111 01111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F },
	- .len = 4,
	+ * [ 11110011 10111111 10111111 01111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0x7F },
	+ .len = 4,
	.exp_len = 3,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 4-byte sequence (overlong encoded)
	- * [ 11110000 10000000 10000001 10111111 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF },
	- .len = 4,
	+ * [ 11110000 10000000 10000001 10111111 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF0, 0x80, 0x81, 0xBF },
	+ .len = 4,
	.exp_len = 4,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	{
	/* invalid 4-byte sequence (UTF-16-unrepresentable)
	- * [ 11110100 10010000 10000000 10000000 ] ->
	- * INVALID
	- */
	- .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 },
	- .len = 4,
	+ * [ 11110100 10010000 10000000 10000000 ] ->
	+ * INVALID
	+ */
	+ .arr = (char *)(unsigned char[]) { 0xF4, 0x90, 0x80, 0x80 },
	+ .len = 4,
	.exp_len = 4,
	- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
	},
	};

	@@ -298,12 +296,12 @@ main(int argc, char *argv[])
	size_t len;
	uint_least32_t cp;

	- len = grapheme_decode_utf8(dec_test[i].arr,
	- dec_test[i].len, &cp);
	+ len = grapheme_decode_utf8(dec_test[i].arr, dec_test[i].len,
	+ &cp);

	- if (len != dec_test[i].exp_len \|\|
	- cp != dec_test[i].exp_cp) {
	- fprintf(stderr, "%s: Failed test %zu: "
	+ if (len != dec_test[i].exp_len \|\| cp != dec_test[i].exp_cp) {
	+ fprintf(stderr,
	+ "%s: Failed test %zu: "
	"Expected (%zx,%u), but got (%zx,%u).\n",
	argv[0], i, dec_test[i].exp_len,
	dec_test[i].exp_cp, len, cp);
	diff --git a/test/utf8-encode.c b/test/utf8-encode.c
	@@ -8,44 +8,44 @@
	#include "util.h"

	static const struct {
	- uint_least32_t cp; /* input codepoint */
	- char exp_arr; / expected UTF-8 byte sequence */
	- size_t exp_len; /* expected length of UTF-8 sequence */
	+ uint_least32_t cp; /* input codepoint */
	+ char exp_arr; / expected UTF-8 byte sequence */
	+ size_t exp_len; /* expected length of UTF-8 sequence */
	} enc_test[] = {
	{
	/* invalid codepoint (UTF-16 surrogate half) */
	- .cp = UINT32_C(0xD800),
	- .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
	+ .cp = UINT32_C(0xD800),
	+ .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
	.exp_len = 3,
	},
	{
	/* invalid codepoint (UTF-16-unrepresentable) */
	- .cp = UINT32_C(0x110000),
	- .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
	+ .cp = UINT32_C(0x110000),
	+ .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
	.exp_len = 3,
	},
	{
	/* codepoint encoded to a 1-byte sequence */
	- .cp = 0x01,
	- .exp_arr = (char *)(unsigned char[]){ 0x01 },
	+ .cp = 0x01,
	+ .exp_arr = (char *)(unsigned char[]) { 0x01 },
	.exp_len = 1,
	},
	{
	/* codepoint encoded to a 2-byte sequence */
	- .cp = 0xFF,
	- .exp_arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
	+ .cp = 0xFF,
	+ .exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
	.exp_len = 2,
	},
	{
	/* codepoint encoded to a 3-byte sequence */
	- .cp = 0xFFF,
	- .exp_arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
	+ .cp = 0xFFF,
	+ .exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
	.exp_len = 3,
	},
	{
	/* codepoint encoded to a 4-byte sequence */
	- .cp = UINT32_C(0xFFFFF),
	- .exp_arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
	+ .cp = UINT32_C(0xFFFFF),
	+ .exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF …
	.exp_len = 4,
	},
	};
	@@ -66,11 +66,12 @@ main(int argc, char *argv[])

	if (len != enc_test[i].exp_len \|\|
	memcmp(arr, enc_test[i].exp_arr, len)) {
	- fprintf(stderr, "%s, Failed test %zu: "
	- "Expected (", argv[0], i);
	+ fprintf(stderr,
	+ "%s, Failed test %zu: "
	+ "Expected (",
	+ argv[0], i);
	for (j = 0; j < enc_test[i].exp_len; j++) {
	- fprintf(stderr, "0x%x",
	- enc_test[i].exp_arr[j]);
	+ fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]…
	if (j + 1 < enc_test[i].exp_len) {
	fprintf(stderr, " ");
	}
	diff --git a/test/util.c b/test/util.c
	@@ -5,13 +5,14 @@
	#include <stdio.h>
	#include <string.h>

	-#include "../grapheme.h"
	#include "../gen/types.h"
	+#include "../grapheme.h"
	#include "util.h"

	int
	run_break_tests(size_t (next_break)(const uint_least32_t , size_t),
	- const struct break_test test, size_t testlen, const char arg…
	+ const struct break_test *test, size_t testlen,
	+ const char *argv0)
	{
	size_t i, j, off, res, failed;

	@@ -21,11 +22,14 @@ run_break_tests(size_t (next_break)(const uint_least32_t …
	res = next_break(test[i].cp + off, test[i].cplen - off…

	/* check if our resulting offset matches */
	- if (j == test[i].lenlen \|\|
	- res != test[i].len[j++]) {
	- fprintf(stderr, "%s: Failed conformance test %…
	+ if (j == test[i].lenlen \|\| res != test[i].len[j++]) {
	+ fprintf(stderr,
	+ "%s: Failed conformance test %zu "
	+ "\"%s\".\n",
	argv0, i, test[i].descr);
	- fprintf(stderr, "J=%zu: EXPECTED len %zu, got …
	+ fprintf(stderr,
	+ "J=%zu: EXPECTED len %zu, got %zu\n",
	+ j - 1, test[i].len[j - 1], res);
	failed++;
	break;
	}
	@@ -39,13 +43,15 @@ run_break_tests(size_t (next_break)(const uint_least32_t …

	int
	run_unit_tests(int (unit_test_callback)(const void , size_t, const char *,
	- const char ), const void test, size_t testlen, const char *na…
	+ const char *),
	+ const void test, size_t testlen, const char name,
	const char *argv0)
	{
	size_t i, failed;

	for (i = 0, failed = 0; i < testlen; i++) {
	- failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 …
	+ failed +=
	+ (unit_test_callback(test, i, name, argv0) == 0) ? 0 : …
	}

	printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name,
	@@ -56,8 +62,9 @@ run_unit_tests(int (unit_test_callback)(const void , size_t…

	int
	unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off,
	- size_t (next_break)(const uint_least32_t …
	- const char name, const char argv0)
	+ size_t (next_break)(const uint_least32_t ,
	+ size_t),
	+ const char name, const char argv0)
	{
	const struct unit_test_next_break *test = t + off;

	@@ -69,16 +76,18 @@ unit_test_callback_next_break(const struct unit_test_next_b…

	return 0;
	err:
	- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
	- "(returned %zu instead of %zu).\n", argv0,
	- name, off, test->description, ret, test->output.ret);
	+ fprintf(stderr,
	+ "%s: %s: Failed unit test %zu \"%s\" "
	+ "(returned %zu instead of %zu).\n",
	+ argv0, name, off, test->description, ret, test->output.ret);
	return 1;
	}

	int
	unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t,
	size_t off,
	- size_t (next_break_utf8)(const char , siz…
	+ size_t (next_break_utf8)(const char ,
	+ size_t),
	const char name, const char argv0)
	{
	const struct unit_test_next_break_utf8 *test = t + off;
	@@ -91,8 +100,9 @@ unit_test_callback_next_break_utf8(const struct unit_test_ne…

	return 0;
	err:
	- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
	- "(returned %zu instead of %zu).\n", argv0,
	- name, off, test->description, ret, test->output.ret);
	+ fprintf(stderr,
	+ "%s: %s: Failed unit test %zu \"%s\" "
	+ "(returned %zu instead of %zu).\n",
	+ argv0, name, off, test->description, ret, test->output.ret);
	return 1;
	}
	diff --git a/test/util.h b/test/util.h
	@@ -6,16 +6,18 @@
	#include "../grapheme.h"

	#undef MIN
	-#define MIN(x,y) ((x) < (y) ? (x) : (y))
	+#define MIN(x, y) ((x) < (y) ? (x) : (y))
	#undef LEN
	#define LEN(x) (sizeof(x) / sizeof(*(x)))

	struct unit_test_next_break {
	const char *description;
	+
	struct {
	const uint_least32_t *src;
	size_t srclen;
	} input;
	+
	struct {
	size_t ret;
	} output;
	@@ -23,10 +25,12 @@ struct unit_test_next_break {

	struct unit_test_next_break_utf8 {
	const char *description;
	+
	struct {
	const char *src;
	size_t srclen;
	} input;
	+
	struct {
	size_t ret;
	} output;
	@@ -36,14 +40,17 @@ int run_break_tests(size_t (*next_break)(const uint_least32…
	const struct break_test *test, size_t testlen,
	const char *);
	int run_unit_tests(int (unit_test_callback)(const void , size_t, const char …
	- const char ), const void , size_t, const char *, const ch…
	+ const char *),
	+ const void , size_t, const char , const char *);

	int unit_test_callback_next_break(const struct unit_test_next_break *, size_t,
	- size_t (next_break)(const uint_least32_t ,…
	+ size_t (next_break)(const uint_least32_t ,
	+ size_t),
	const char , const char );
	int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 …
	size_t,
	- size_t (next_break_utf8)(const char ,…
	+ size_t (next_break_utf8)(const char ,
	+ size_t),
	const char , const char );

	#endif /* UTIL_H */
	diff --git a/test/word.c b/test/word.c
	@@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_word_bre…
	};

	static int
	-unit_test_callback_next_word_break(const void *t, size_t off,
	- const char *name,
	- const char *argv0)
	+unit_test_callback_next_word_break(const void t, size_t off, const char name,
	+ const char *argv0)
	{
	- return unit_test_callback_next_break(t, off,
	- grapheme_next_word_break,
	+ return unit_test_callback_next_break(t, off, grapheme_next_word_break,
	name, argv0);
	}

	static int
	unit_test_callback_next_word_break_utf8(const void *t, size_t off,
	- const char *name,
	- const char *argv0)
	+ const char name, const char argv0)
	{
	- return unit_test_callback_next_break_utf8(t, off,
	- grapheme_next_word_break_utf…
	- name, argv0);
	+ return unit_test_callback_next_break_utf8(
	+ t, off, grapheme_next_word_break_utf8, name, argv0);
	}

	int