Add unit tests for all segmentation functions - libgrapheme - unicode string li… | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit a815be4b5de7f7df2da664049fdb04874d37016a | |
parent 5ea8d87a9a0fb9c6dda827cc55d43c637cd4086d | |
Author: Laslo Hunhold <[email protected]> | |
Date: Mon, 3 Oct 2022 21:18:52 +0200 | |
Add unit tests for all segmentation functions | |
Now all functions in the library are covered by exhaustive unit tests | |
which supplement the already present conformance tests to make sure | |
that the thin layer between API and implementation is also working as | |
expected. | |
At this point I would assess that libgrapheme is a stable foundation | |
for using it in the real world and now preparation can go underway | |
to prepare the release of version 2. | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M test/character.c | 113 +++++++++++++++++++++++++++++… | |
M test/line.c | 112 +++++++++++++++++++++++++++++… | |
M test/sentence.c | 112 +++++++++++++++++++++++++++++… | |
M test/utf8-decode.c | 2 +- | |
M test/utf8-encode.c | 2 +- | |
M test/util.c | 47 +++++++++++++++++++++++++++++… | |
M test/util.h | 34 +++++++++++++++++++++++++++++… | |
M test/word.c | 112 +++++++++++++++++++++++++++++… | |
8 files changed, 523 insertions(+), 11 deletions(-) | |
--- | |
diff --git a/test/character.c b/test/character.c | |
@@ -6,12 +6,121 @@ | |
#include "../grapheme.h" | |
#include "util.h" | |
+static const struct unit_test_next_break next_character_break[] = { | |
+ { | |
+ .description = "NULL input", | |
+ .input = { | |
+ .src = NULL, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 }, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input, null-terminated", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 }, | |
+ .srclen = SIZE_MAX, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "one character", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E… | |
+ .srclen = 3, | |
+ }, | |
+ .output = { 2 }, | |
+ }, | |
+ { | |
+ .description = "one character, null-terminated", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E… | |
+ .srclen = SIZE_MAX, | |
+ }, | |
+ .output = { 2 }, | |
+ }, | |
+}; | |
+ | |
+static const struct unit_test_next_break_utf8 next_character_break_utf8[] = { | |
+ { | |
+ .description = "NULL input", | |
+ .input = { | |
+ .src = NULL, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input", | |
+ .input = { "", 0 }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input, NUL-terminated", | |
+ .input = { "", SIZE_MAX }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "one character", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA*", 9 }, | |
+ .output = { 8 }, | |
+ }, | |
+ { | |
+ .description = "one character, fragment", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 }, | |
+ .output = { 4 }, | |
+ }, | |
+ { | |
+ .description = "one character, NUL-terminated", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA", SIZE_MAX }, | |
+ .output = { 8 }, | |
+ }, | |
+ { | |
+ .description = "one character, fragment, NUL-terminated", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX }, | |
+ .output = { 4 }, | |
+ }, | |
+}; | |
+ | |
+static int | |
+unit_test_callback_next_character_break(const void *t, size_t off, | |
+ const char *name, | |
+ const char *argv0) | |
+{ | |
+ return unit_test_callback_next_break(t, off, | |
+ grapheme_next_character_break, | |
+ name, argv0); | |
+} | |
+ | |
+static int | |
+unit_test_callback_next_character_break_utf8(const void *t, size_t off, | |
+ const char *name, | |
+ const char *argv0) | |
+{ | |
+ return unit_test_callback_next_break_utf8(t, off, | |
+ grapheme_next_character_brea… | |
+ name, argv0); | |
+} | |
+ | |
int | |
main(int argc, char *argv[]) | |
{ | |
(void)argc; | |
return run_break_tests(grapheme_next_character_break, | |
- character_break_test, | |
- LEN(character_break_test), argv[0]); | |
+ character_break_test, LEN(character_break_test)… | |
+ run_unit_tests(unit_test_callback_next_character_break, | |
+ next_character_break, LEN(next_character_break), | |
+ "grapheme_next_character_break", argv[0]) + | |
+ run_unit_tests(unit_test_callback_next_character_break_utf8, | |
+ next_character_break_utf8, LEN(next_character_br… | |
+ "grapheme_next_character_break_utf8", argv[0]); | |
} | |
diff --git a/test/line.c b/test/line.c | |
@@ -6,6 +6,110 @@ | |
#include "../grapheme.h" | |
#include "util.h" | |
+static const struct unit_test_next_break next_line_break[] = { | |
+ { | |
+ .description = "NULL input", | |
+ .input = { | |
+ .src = NULL, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 }, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input, null-terminated", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 }, | |
+ .srclen = SIZE_MAX, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "one opportunity", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E… | |
+ .srclen = 4, | |
+ }, | |
+ .output = { 3 }, | |
+ }, | |
+ { | |
+ .description = "one opportunity, null-terminated", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E… | |
+ .srclen = SIZE_MAX, | |
+ }, | |
+ .output = { 3 }, | |
+ }, | |
+}; | |
+ | |
+static const struct unit_test_next_break_utf8 next_line_break_utf8[] = { | |
+ { | |
+ .description = "NULL input", | |
+ .input = { | |
+ .src = NULL, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input", | |
+ .input = { "", 0 }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input, NUL-terminated", | |
+ .input = { "", SIZE_MAX }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "one opportunity", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA *", 10 }, | |
+ .output = { 9 }, | |
+ }, | |
+ { | |
+ .description = "one opportunity, fragment", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 }, | |
+ .output = { 4 }, | |
+ }, | |
+ { | |
+ .description = "one opportunity, NUL-terminated", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA A", SIZE_MAX }, | |
+ .output = { 9 }, | |
+ }, | |
+ { | |
+ .description = "one opportunity, fragment, NUL-terminated", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX }, | |
+ .output = { 4 }, | |
+ }, | |
+}; | |
+ | |
+static int | |
+unit_test_callback_next_line_break(const void *t, size_t off, | |
+ const char *name, | |
+ const char *argv0) | |
+{ | |
+ return unit_test_callback_next_break(t, off, | |
+ grapheme_next_line_break, | |
+ name, argv0); | |
+} | |
+ | |
+static int | |
+unit_test_callback_next_line_break_utf8(const void *t, size_t off, | |
+ const char *name, | |
+ const char *argv0) | |
+{ | |
+ return unit_test_callback_next_break_utf8(t, off, | |
+ grapheme_next_line_break_utf… | |
+ name, argv0); | |
+} | |
+ | |
int | |
main(int argc, char *argv[]) | |
{ | |
@@ -13,5 +117,11 @@ main(int argc, char *argv[]) | |
return run_break_tests(grapheme_next_line_break, | |
line_break_test, LEN(line_break_test), | |
- argv[0]); | |
+ argv[0]) + | |
+ run_unit_tests(unit_test_callback_next_line_break, | |
+ next_line_break, LEN(next_line_break), | |
+ "grapheme_next_line_break", argv[0]) + | |
+ run_unit_tests(unit_test_callback_next_line_break_utf8, | |
+ next_line_break_utf8, LEN(next_line_break_utf8), | |
+ "grapheme_next_line_break_utf8", argv[0]); | |
} | |
diff --git a/test/sentence.c b/test/sentence.c | |
@@ -6,6 +6,110 @@ | |
#include "../grapheme.h" | |
#include "util.h" | |
+static const struct unit_test_next_break next_sentence_break[] = { | |
+ { | |
+ .description = "NULL input", | |
+ .input = { | |
+ .src = NULL, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 }, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input, null-terminated", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 }, | |
+ .srclen = SIZE_MAX, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "one sentence", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E… | |
+ .srclen = 5, | |
+ }, | |
+ .output = { 4 }, | |
+ }, | |
+ { | |
+ .description = "one sentence, null-terminated", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E… | |
+ .srclen = SIZE_MAX, | |
+ }, | |
+ .output = { 4 }, | |
+ }, | |
+}; | |
+ | |
+static const struct unit_test_next_break_utf8 next_sentence_break_utf8[] = { | |
+ { | |
+ .description = "NULL input", | |
+ .input = { | |
+ .src = NULL, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input", | |
+ .input = { "", 0 }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input, NUL-terminated", | |
+ .input = { "", SIZE_MAX }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "one sentence", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Ge… | |
+ .output = { 34 }, | |
+ }, | |
+ { | |
+ .description = "one sentence, fragment", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 }, | |
+ .output = { 4 }, | |
+ }, | |
+ { | |
+ .description = "one sentence, NUL-terminated", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Ge… | |
+ .output = { 34 }, | |
+ }, | |
+ { | |
+ .description = "one sentence, fragment, NUL-terminated", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX }, | |
+ .output = { 6 }, | |
+ }, | |
+}; | |
+ | |
+static int | |
+unit_test_callback_next_sentence_break(const void *t, size_t off, | |
+ const char *name, | |
+ const char *argv0) | |
+{ | |
+ return unit_test_callback_next_break(t, off, | |
+ grapheme_next_sentence_break, | |
+ name, argv0); | |
+} | |
+ | |
+static int | |
+unit_test_callback_next_sentence_break_utf8(const void *t, size_t off, | |
+ const char *name, | |
+ const char *argv0) | |
+{ | |
+ return unit_test_callback_next_break_utf8(t, off, | |
+ grapheme_next_sentence_break… | |
+ name, argv0); | |
+} | |
+ | |
int | |
main(int argc, char *argv[]) | |
{ | |
@@ -13,5 +117,11 @@ main(int argc, char *argv[]) | |
return run_break_tests(grapheme_next_sentence_break, | |
sentence_break_test, | |
- LEN(sentence_break_test), argv[0]); | |
+ LEN(sentence_break_test), argv[0]) + | |
+ run_unit_tests(unit_test_callback_next_sentence_break, | |
+ next_sentence_break, LEN(next_sentence_break), | |
+ "grapheme_next_sentence_break", argv[0]) + | |
+ run_unit_tests(unit_test_callback_next_sentence_break_utf8, | |
+ next_sentence_break_utf8, LEN(next_sentence_brea… | |
+ "grapheme_next_character_break_utf8", argv[0]); | |
} | |
diff --git a/test/utf8-decode.c b/test/utf8-decode.c | |
@@ -310,7 +310,7 @@ main(int argc, char *argv[]) | |
failed++; | |
} | |
} | |
- printf("%s: %zu/%zu tests passed.\n", argv[0], | |
+ printf("%s: %zu/%zu unit tests passed.\n", argv[0], | |
LEN(dec_test) - failed, LEN(dec_test)); | |
return (failed > 0) ? 1 : 0; | |
diff --git a/test/utf8-encode.c b/test/utf8-encode.c | |
@@ -86,7 +86,7 @@ main(int argc, char *argv[]) | |
failed++; | |
} | |
} | |
- printf("%s: %zu/%zu tests passed.\n", argv[0], | |
+ printf("%s: %zu/%zu unit tests passed.\n", argv[0], | |
LEN(enc_test) - failed, LEN(enc_test)); | |
return (failed > 0) ? 1 : 0; | |
diff --git a/test/util.c b/test/util.c | |
@@ -38,8 +38,8 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, … | |
} | |
int | |
-run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *, | |
- const char *), void *test, size_t testlen, const char *name, | |
+run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *, | |
+ const char *), const void *test, size_t testlen, const char *na… | |
const char *argv0) | |
{ | |
size_t i, failed; | |
@@ -53,3 +53,46 @@ run_unit_tests(int (*unit_test_callback)(void *, size_t, con… | |
return (failed > 0) ? 1 : 0; | |
} | |
+ | |
+int | |
+unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off, | |
+ size_t (*next_break)(const uint_least32_t *… | |
+ const char *name, const char *argv0) | |
+{ | |
+ const struct unit_test_next_break *test = t + off; | |
+ | |
+ size_t ret = next_break(test->input.src, test->input.srclen); | |
+ | |
+ if (ret != test->output.ret) { | |
+ goto err; | |
+ } | |
+ | |
+ return 0; | |
+err: | |
+ fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " | |
+ "(returned %zu instead of %zu).\n", argv0, | |
+ name, off, test->description, ret, test->output.ret); | |
+ return 1; | |
+} | |
+ | |
+int | |
+unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t, | |
+ size_t off, | |
+ size_t (*next_break_utf8)(const char *, siz… | |
+ const char *name, const char *argv0) | |
+{ | |
+ const struct unit_test_next_break_utf8 *test = t + off; | |
+ | |
+ size_t ret = next_break_utf8(test->input.src, test->input.srclen); | |
+ | |
+ if (ret != test->output.ret) { | |
+ goto err; | |
+ } | |
+ | |
+ return 0; | |
+err: | |
+ fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " | |
+ "(returned %zu instead of %zu).\n", argv0, | |
+ name, off, test->description, ret, test->output.ret); | |
+ return 1; | |
+} | |
diff --git a/test/util.h b/test/util.h | |
@@ -10,10 +10,40 @@ | |
#undef LEN | |
#define LEN(x) (sizeof(x) / sizeof(*(x))) | |
+struct unit_test_next_break { | |
+ const char *description; | |
+ struct { | |
+ const uint_least32_t *src; | |
+ size_t srclen; | |
+ } input; | |
+ struct { | |
+ size_t ret; | |
+ } output; | |
+}; | |
+ | |
+struct unit_test_next_break_utf8 { | |
+ const char *description; | |
+ struct { | |
+ const char *src; | |
+ size_t srclen; | |
+ } input; | |
+ struct { | |
+ size_t ret; | |
+ } output; | |
+}; | |
+ | |
int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), | |
const struct break_test *test, size_t testlen, | |
const char *); | |
-int run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *, | |
- const char *), void *, size_t, const char *, const char *); | |
+int run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char … | |
+ const char *), const void *, size_t, const char *, const ch… | |
+ | |
+int unit_test_callback_next_break(const struct unit_test_next_break *, size_t, | |
+ size_t (*next_break)(const uint_least32_t *,… | |
+ const char *, const char *); | |
+int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 … | |
+ size_t, | |
+ size_t (*next_break_utf8)(const char *,… | |
+ const char *, const char *); | |
#endif /* UTIL_H */ | |
diff --git a/test/word.c b/test/word.c | |
@@ -6,11 +6,121 @@ | |
#include "../grapheme.h" | |
#include "util.h" | |
+static const struct unit_test_next_break next_word_break[] = { | |
+ { | |
+ .description = "NULL input", | |
+ .input = { | |
+ .src = NULL, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 }, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input, null-terminated", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 }, | |
+ .srclen = SIZE_MAX, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "one word", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E… | |
+ .srclen = 4, | |
+ }, | |
+ .output = { 2 }, | |
+ }, | |
+ { | |
+ .description = "one word, null-terminated", | |
+ .input = { | |
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E… | |
+ .srclen = SIZE_MAX, | |
+ }, | |
+ .output = { 2 }, | |
+ }, | |
+}; | |
+ | |
+static const struct unit_test_next_break_utf8 next_word_break_utf8[] = { | |
+ { | |
+ .description = "NULL input", | |
+ .input = { | |
+ .src = NULL, | |
+ .srclen = 0, | |
+ }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input", | |
+ .input = { "", 0 }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "empty input, NUL-terminated", | |
+ .input = { "", SIZE_MAX }, | |
+ .output = { 0 }, | |
+ }, | |
+ { | |
+ .description = "one word", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", 11 }, | |
+ .output = { 8 }, | |
+ }, | |
+ { | |
+ .description = "one word, fragment", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 }, | |
+ .output = { 4 }, | |
+ }, | |
+ { | |
+ .description = "one word, NUL-terminated", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", SIZE_MAX }, | |
+ .output = { 8 }, | |
+ }, | |
+ { | |
+ .description = "one word, fragment, NUL-terminated", | |
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX }, | |
+ .output = { 4 }, | |
+ }, | |
+}; | |
+ | |
+static int | |
+unit_test_callback_next_word_break(const void *t, size_t off, | |
+ const char *name, | |
+ const char *argv0) | |
+{ | |
+ return unit_test_callback_next_break(t, off, | |
+ grapheme_next_word_break, | |
+ name, argv0); | |
+} | |
+ | |
+static int | |
+unit_test_callback_next_word_break_utf8(const void *t, size_t off, | |
+ const char *name, | |
+ const char *argv0) | |
+{ | |
+ return unit_test_callback_next_break_utf8(t, off, | |
+ grapheme_next_word_break_utf… | |
+ name, argv0); | |
+} | |
+ | |
int | |
main(int argc, char *argv[]) | |
{ | |
(void)argc; | |
return run_break_tests(grapheme_next_word_break, word_break_test, | |
- LEN(word_break_test), argv[0]); | |
+ LEN(word_break_test), argv[0]) + | |
+ run_unit_tests(unit_test_callback_next_word_break, | |
+ next_word_break, LEN(next_word_break), | |
+ "grapheme_next_word_break", argv[0]) + | |
+ run_unit_tests(unit_test_callback_next_word_break_utf8, | |
+ next_word_break_utf8, LEN(next_word_break_utf8), | |
+ "grapheme_next_word_break_utf8", argv[0]); | |
} |