GopherProxy

	Add unit tests for all segmentation functions - libgrapheme - unicode string li…
	git clone git://git.suckless.org/libgrapheme
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit a815be4b5de7f7df2da664049fdb04874d37016a
	parent 5ea8d87a9a0fb9c6dda827cc55d43c637cd4086d
	Author: Laslo Hunhold <[email protected]>
	Date: Mon, 3 Oct 2022 21:18:52 +0200

	Add unit tests for all segmentation functions

	Now all functions in the library are covered by exhaustive unit tests
	which supplement the already present conformance tests to make sure
	that the thin layer between API and implementation is also working as
	expected.

	At this point I would assess that libgrapheme is a stable foundation
	for using it in the real world and now preparation can go underway
	to prepare the release of version 2.

	Signed-off-by: Laslo Hunhold <[email protected]>

	Diffstat:
	M test/character.c \| 113 +++++++++++++++++++++++++++++…
	M test/line.c \| 112 +++++++++++++++++++++++++++++…
	M test/sentence.c \| 112 +++++++++++++++++++++++++++++…
	M test/utf8-decode.c \| 2 +-
	M test/utf8-encode.c \| 2 +-
	M test/util.c \| 47 +++++++++++++++++++++++++++++…
	M test/util.h \| 34 +++++++++++++++++++++++++++++…
	M test/word.c \| 112 +++++++++++++++++++++++++++++…

	8 files changed, 523 insertions(+), 11 deletions(-)
	---
	diff --git a/test/character.c b/test/character.c
	@@ -6,12 +6,121 @@
	#include "../grapheme.h"
	#include "util.h"

	+static const struct unit_test_next_break next_character_break[] = {
	+ {
	+ .description = "NULL input",
	+ .input = {
	+ .src = NULL,
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input, null-terminated",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
	+ .srclen = SIZE_MAX,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "one character",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E…
	+ .srclen = 3,
	+ },
	+ .output = { 2 },
	+ },
	+ {
	+ .description = "one character, null-terminated",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E…
	+ .srclen = SIZE_MAX,
	+ },
	+ .output = { 2 },
	+ },
	+};
	+
	+static const struct unit_test_next_break_utf8 next_character_break_utf8[] = {
	+ {
	+ .description = "NULL input",
	+ .input = {
	+ .src = NULL,
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input",
	+ .input = { "", 0 },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input, NUL-terminated",
	+ .input = { "", SIZE_MAX },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "one character",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA*", 9 },
	+ .output = { 8 },
	+ },
	+ {
	+ .description = "one character, fragment",
	+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
	+ .output = { 4 },
	+ },
	+ {
	+ .description = "one character, NUL-terminated",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA", SIZE_MAX },
	+ .output = { 8 },
	+ },
	+ {
	+ .description = "one character, fragment, NUL-terminated",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
	+ .output = { 4 },
	+ },
	+};
	+
	+static int
	+unit_test_callback_next_character_break(const void *t, size_t off,
	+ const char *name,
	+ const char *argv0)
	+{
	+ return unit_test_callback_next_break(t, off,
	+ grapheme_next_character_break,
	+ name, argv0);
	+}
	+
	+static int
	+unit_test_callback_next_character_break_utf8(const void *t, size_t off,
	+ const char *name,
	+ const char *argv0)
	+{
	+ return unit_test_callback_next_break_utf8(t, off,
	+ grapheme_next_character_brea…
	+ name, argv0);
	+}
	+
	int
	main(int argc, char *argv[])
	{
	(void)argc;

	return run_break_tests(grapheme_next_character_break,
	- character_break_test,
	- LEN(character_break_test), argv[0]);
	+ character_break_test, LEN(character_break_test)…
	+ run_unit_tests(unit_test_callback_next_character_break,
	+ next_character_break, LEN(next_character_break),
	+ "grapheme_next_character_break", argv[0]) +
	+ run_unit_tests(unit_test_callback_next_character_break_utf8,
	+ next_character_break_utf8, LEN(next_character_br…
	+ "grapheme_next_character_break_utf8", argv[0]);
	}
	diff --git a/test/line.c b/test/line.c
	@@ -6,6 +6,110 @@
	#include "../grapheme.h"
	#include "util.h"

	+static const struct unit_test_next_break next_line_break[] = {
	+ {
	+ .description = "NULL input",
	+ .input = {
	+ .src = NULL,
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input, null-terminated",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
	+ .srclen = SIZE_MAX,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "one opportunity",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E…
	+ .srclen = 4,
	+ },
	+ .output = { 3 },
	+ },
	+ {
	+ .description = "one opportunity, null-terminated",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E…
	+ .srclen = SIZE_MAX,
	+ },
	+ .output = { 3 },
	+ },
	+};
	+
	+static const struct unit_test_next_break_utf8 next_line_break_utf8[] = {
	+ {
	+ .description = "NULL input",
	+ .input = {
	+ .src = NULL,
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input",
	+ .input = { "", 0 },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input, NUL-terminated",
	+ .input = { "", SIZE_MAX },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "one opportunity",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA *", 10 },
	+ .output = { 9 },
	+ },
	+ {
	+ .description = "one opportunity, fragment",
	+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
	+ .output = { 4 },
	+ },
	+ {
	+ .description = "one opportunity, NUL-terminated",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA A", SIZE_MAX },
	+ .output = { 9 },
	+ },
	+ {
	+ .description = "one opportunity, fragment, NUL-terminated",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
	+ .output = { 4 },
	+ },
	+};
	+
	+static int
	+unit_test_callback_next_line_break(const void *t, size_t off,
	+ const char *name,
	+ const char *argv0)
	+{
	+ return unit_test_callback_next_break(t, off,
	+ grapheme_next_line_break,
	+ name, argv0);
	+}
	+
	+static int
	+unit_test_callback_next_line_break_utf8(const void *t, size_t off,
	+ const char *name,
	+ const char *argv0)
	+{
	+ return unit_test_callback_next_break_utf8(t, off,
	+ grapheme_next_line_break_utf…
	+ name, argv0);
	+}
	+
	int
	main(int argc, char *argv[])
	{
	@@ -13,5 +117,11 @@ main(int argc, char *argv[])

	return run_break_tests(grapheme_next_line_break,
	line_break_test, LEN(line_break_test),
	- argv[0]);
	+ argv[0]) +
	+ run_unit_tests(unit_test_callback_next_line_break,
	+ next_line_break, LEN(next_line_break),
	+ "grapheme_next_line_break", argv[0]) +
	+ run_unit_tests(unit_test_callback_next_line_break_utf8,
	+ next_line_break_utf8, LEN(next_line_break_utf8),
	+ "grapheme_next_line_break_utf8", argv[0]);
	}
	diff --git a/test/sentence.c b/test/sentence.c
	@@ -6,6 +6,110 @@
	#include "../grapheme.h"
	#include "util.h"

	+static const struct unit_test_next_break next_sentence_break[] = {
	+ {
	+ .description = "NULL input",
	+ .input = {
	+ .src = NULL,
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input, null-terminated",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
	+ .srclen = SIZE_MAX,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "one sentence",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E…
	+ .srclen = 5,
	+ },
	+ .output = { 4 },
	+ },
	+ {
	+ .description = "one sentence, null-terminated",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E…
	+ .srclen = SIZE_MAX,
	+ },
	+ .output = { 4 },
	+ },
	+};
	+
	+static const struct unit_test_next_break_utf8 next_sentence_break_utf8[] = {
	+ {
	+ .description = "NULL input",
	+ .input = {
	+ .src = NULL,
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input",
	+ .input = { "", 0 },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input, NUL-terminated",
	+ .input = { "", SIZE_MAX },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "one sentence",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Ge…
	+ .output = { 34 },
	+ },
	+ {
	+ .description = "one sentence, fragment",
	+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
	+ .output = { 4 },
	+ },
	+ {
	+ .description = "one sentence, NUL-terminated",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Ge…
	+ .output = { 34 },
	+ },
	+ {
	+ .description = "one sentence, fragment, NUL-terminated",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
	+ .output = { 6 },
	+ },
	+};
	+
	+static int
	+unit_test_callback_next_sentence_break(const void *t, size_t off,
	+ const char *name,
	+ const char *argv0)
	+{
	+ return unit_test_callback_next_break(t, off,
	+ grapheme_next_sentence_break,
	+ name, argv0);
	+}
	+
	+static int
	+unit_test_callback_next_sentence_break_utf8(const void *t, size_t off,
	+ const char *name,
	+ const char *argv0)
	+{
	+ return unit_test_callback_next_break_utf8(t, off,
	+ grapheme_next_sentence_break…
	+ name, argv0);
	+}
	+
	int
	main(int argc, char *argv[])
	{
	@@ -13,5 +117,11 @@ main(int argc, char *argv[])

	return run_break_tests(grapheme_next_sentence_break,
	sentence_break_test,
	- LEN(sentence_break_test), argv[0]);
	+ LEN(sentence_break_test), argv[0]) +
	+ run_unit_tests(unit_test_callback_next_sentence_break,
	+ next_sentence_break, LEN(next_sentence_break),
	+ "grapheme_next_sentence_break", argv[0]) +
	+ run_unit_tests(unit_test_callback_next_sentence_break_utf8,
	+ next_sentence_break_utf8, LEN(next_sentence_brea…
	+ "grapheme_next_character_break_utf8", argv[0]);
	}
	diff --git a/test/utf8-decode.c b/test/utf8-decode.c
	@@ -310,7 +310,7 @@ main(int argc, char *argv[])
	failed++;
	}
	}
	- printf("%s: %zu/%zu tests passed.\n", argv[0],
	+ printf("%s: %zu/%zu unit tests passed.\n", argv[0],
	LEN(dec_test) - failed, LEN(dec_test));

	return (failed > 0) ? 1 : 0;
	diff --git a/test/utf8-encode.c b/test/utf8-encode.c
	@@ -86,7 +86,7 @@ main(int argc, char *argv[])
	failed++;
	}
	}
	- printf("%s: %zu/%zu tests passed.\n", argv[0],
	+ printf("%s: %zu/%zu unit tests passed.\n", argv[0],
	LEN(enc_test) - failed, LEN(enc_test));

	return (failed > 0) ? 1 : 0;
	diff --git a/test/util.c b/test/util.c
	@@ -38,8 +38,8 @@ run_break_tests(size_t (next_break)(const uint_least32_t , …
	}

	int
	-run_unit_tests(int (unit_test_callback)(void , size_t, const char *,
	- const char ), void test, size_t testlen, const char *name,
	+run_unit_tests(int (unit_test_callback)(const void , size_t, const char *,
	+ const char ), const void test, size_t testlen, const char *na…
	const char *argv0)
	{
	size_t i, failed;
	@@ -53,3 +53,46 @@ run_unit_tests(int (unit_test_callback)(void , size_t, con…

	return (failed > 0) ? 1 : 0;
	}
	+
	+int
	+unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off,
	+ size_t (next_break)(const uint_least32_t …
	+ const char name, const char argv0)
	+{
	+ const struct unit_test_next_break *test = t + off;
	+
	+ size_t ret = next_break(test->input.src, test->input.srclen);
	+
	+ if (ret != test->output.ret) {
	+ goto err;
	+ }
	+
	+ return 0;
	+err:
	+ fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
	+ "(returned %zu instead of %zu).\n", argv0,
	+ name, off, test->description, ret, test->output.ret);
	+ return 1;
	+}
	+
	+int
	+unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t,
	+ size_t off,
	+ size_t (next_break_utf8)(const char , siz…
	+ const char name, const char argv0)
	+{
	+ const struct unit_test_next_break_utf8 *test = t + off;
	+
	+ size_t ret = next_break_utf8(test->input.src, test->input.srclen);
	+
	+ if (ret != test->output.ret) {
	+ goto err;
	+ }
	+
	+ return 0;
	+err:
	+ fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
	+ "(returned %zu instead of %zu).\n", argv0,
	+ name, off, test->description, ret, test->output.ret);
	+ return 1;
	+}
	diff --git a/test/util.h b/test/util.h
	@@ -10,10 +10,40 @@
	#undef LEN
	#define LEN(x) (sizeof(x) / sizeof(*(x)))

	+struct unit_test_next_break {
	+ const char *description;
	+ struct {
	+ const uint_least32_t *src;
	+ size_t srclen;
	+ } input;
	+ struct {
	+ size_t ret;
	+ } output;
	+};
	+
	+struct unit_test_next_break_utf8 {
	+ const char *description;
	+ struct {
	+ const char *src;
	+ size_t srclen;
	+ } input;
	+ struct {
	+ size_t ret;
	+ } output;
	+};
	+
	int run_break_tests(size_t (next_break)(const uint_least32_t , size_t),
	const struct break_test *test, size_t testlen,
	const char *);
	-int run_unit_tests(int (unit_test_callback)(void , size_t, const char *,
	- const char ), void , size_t, const char , const char );
	+int run_unit_tests(int (unit_test_callback)(const void , size_t, const char …
	+ const char ), const void , size_t, const char *, const ch…
	+
	+int unit_test_callback_next_break(const struct unit_test_next_break *, size_t,
	+ size_t (next_break)(const uint_least32_t ,…
	+ const char , const char );
	+int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 …
	+ size_t,
	+ size_t (next_break_utf8)(const char ,…
	+ const char , const char );

	#endif /* UTIL_H */
	diff --git a/test/word.c b/test/word.c
	@@ -6,11 +6,121 @@
	#include "../grapheme.h"
	#include "util.h"

	+static const struct unit_test_next_break next_word_break[] = {
	+ {
	+ .description = "NULL input",
	+ .input = {
	+ .src = NULL,
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input, null-terminated",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
	+ .srclen = SIZE_MAX,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "one word",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E…
	+ .srclen = 4,
	+ },
	+ .output = { 2 },
	+ },
	+ {
	+ .description = "one word, null-terminated",
	+ .input = {
	+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E…
	+ .srclen = SIZE_MAX,
	+ },
	+ .output = { 2 },
	+ },
	+};
	+
	+static const struct unit_test_next_break_utf8 next_word_break_utf8[] = {
	+ {
	+ .description = "NULL input",
	+ .input = {
	+ .src = NULL,
	+ .srclen = 0,
	+ },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input",
	+ .input = { "", 0 },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "empty input, NUL-terminated",
	+ .input = { "", SIZE_MAX },
	+ .output = { 0 },
	+ },
	+ {
	+ .description = "one word",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", 11 },
	+ .output = { 8 },
	+ },
	+ {
	+ .description = "one word, fragment",
	+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
	+ .output = { 4 },
	+ },
	+ {
	+ .description = "one word, NUL-terminated",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", SIZE_MAX },
	+ .output = { 8 },
	+ },
	+ {
	+ .description = "one word, fragment, NUL-terminated",
	+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
	+ .output = { 4 },
	+ },
	+};
	+
	+static int
	+unit_test_callback_next_word_break(const void *t, size_t off,
	+ const char *name,
	+ const char *argv0)
	+{
	+ return unit_test_callback_next_break(t, off,
	+ grapheme_next_word_break,
	+ name, argv0);
	+}
	+
	+static int
	+unit_test_callback_next_word_break_utf8(const void *t, size_t off,
	+ const char *name,
	+ const char *argv0)
	+{
	+ return unit_test_callback_next_break_utf8(t, off,
	+ grapheme_next_word_break_utf…
	+ name, argv0);
	+}
	+
	int
	main(int argc, char *argv[])
	{
	(void)argc;

	return run_break_tests(grapheme_next_word_break, word_break_test,
	- LEN(word_break_test), argv[0]);
	+ LEN(word_break_test), argv[0]) +
	+ run_unit_tests(unit_test_callback_next_word_break,
	+ next_word_break, LEN(next_word_break),
	+ "grapheme_next_word_break", argv[0]) +
	+ run_unit_tests(unit_test_callback_next_word_break_utf8,
	+ next_word_break_utf8, LEN(next_word_break_utf8),
	+ "grapheme_next_word_break_utf8", argv[0]);
	}