Refactor case-checking-functions with Herodotus and add unit tests - libgraphem… | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 5dec22a7143e1105f25c7a7626fa166d882367d0 | |
parent 8a7e2ee85f0a2824e48e85e57534c5b18113cf07 | |
Author: Laslo Hunhold <[email protected]> | |
Date: Sat, 24 Sep 2022 10:36:15 +0200 | |
Refactor case-checking-functions with Herodotus and add unit tests | |
Additionally, expand the unit tests with special-casing-cases. | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M src/case.c | 213 +++++++++++++++--------------… | |
M src/util.h | 3 ++- | |
M test/case.c | 312 +++++++++++++++++++++++++++--… | |
3 files changed, 382 insertions(+), 146 deletions(-) | |
--- | |
diff --git a/src/case.c b/src/case.c | |
@@ -1,4 +1,5 @@ | |
/* See LICENSE file for copyright and license details. */ | |
+#include <stddef.h> | |
#include <stdint.h> | |
#include "../grapheme.h" | |
@@ -208,6 +209,7 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w) | |
/* cast the rest of the codepoints in the word to lowercase */ | |
to_case(r, w, 1, lower_major, lower_minor, lower_special); | |
+ /* remove the limit on the word before the next iteration */ | |
herodotus_reader_pop_limit(r); | |
} | |
@@ -289,20 +291,16 @@ grapheme_to_titlecase_utf8(const char *src, size_t srclen… | |
} | |
static inline bool | |
-is_case(const void *src, size_t srclen, | |
- size_t srcnumprocess, | |
- size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *… | |
- const uint_least16_t *major, const int_least32_t *minor, | |
- const struct special_case *sc, size_t *output) | |
+is_case(HERODOTUS_READER *r, const uint_least16_t *major, | |
+ const int_least32_t *minor, const struct special_case *sc, | |
+ size_t *output) | |
{ | |
- size_t srcoff, new_srcoff, tmp, res, off, i; | |
- uint_least32_t cp, tmp_cp; | |
+ size_t off, i; | |
+ bool ret = true; | |
+ uint_least32_t cp; | |
int_least32_t map; | |
- for (srcoff = 0; srcoff < srcnumprocess; srcoff = new_srcoff) { | |
- /* read in next source codepoint */ | |
- new_srcoff = srcoff + get_codepoint(src, srclen, srcoff, &cp); | |
- | |
+ for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUC… | |
/* get and handle case mapping */ | |
if (unlikely((map = get_case_offset(cp, major, minor)) >= | |
INT32_C(0x110000))) { | |
@@ -310,173 +308,162 @@ is_case(const void *src, size_t srclen, | |
* is the difference to 0x110000*/ | |
off = (uint_least32_t)map - UINT32_C(0x110000); | |
- for (i = 0, tmp = srcoff; i < sc[off].cplen; i++, tmp … | |
- res = get_codepoint(src, srclen, srcoff, &tmp_… | |
- if (tmp_cp != sc[off].cp[i]) { | |
- /* we have a difference */ | |
- if (output) { | |
- *output = tmp; | |
+ for (i = 0; i < sc[off].cplen; i++) { | |
+ if (herodotus_read_codepoint(r, false, &cp) == | |
+ HERODOTUS_STATUS_SUCCESS) { | |
+ if (cp != sc[off].cp[i]) { | |
+ ret = false; | |
+ goto done; | |
+ } else { | |
+ /* move forward */ | |
+ herodotus_read_codepoint(r, tr… | |
} | |
- return false; | |
+ } else { | |
+ /* | |
+ * input ended and we didn't see | |
+ * any difference so far, so this | |
+ * string is in fact okay | |
+ */ | |
+ ret = true; | |
+ goto done; | |
} | |
} | |
- new_srcoff = tmp; | |
} else { | |
/* we have a simple mapping */ | |
if (cp != (uint_least32_t)((int_least32_t)cp + map)) { | |
/* we have a difference */ | |
- if (output) { | |
- *output = srcoff; | |
- } | |
- return false; | |
+ ret = false; | |
+ goto done; | |
+ } else { | |
+ /* move forward */ | |
+ herodotus_read_codepoint(r, true, &cp); | |
} | |
} | |
} | |
- | |
+done: | |
if (output) { | |
- *output = srcoff; | |
+ *output = herodotus_reader_number_read(r); | |
} | |
- return true; | |
+ return ret; | |
} | |
static inline bool | |
-is_titlecase(const void *src, size_t srclen, | |
- size_t (*get_codepoint)(const void *, size_t, size_t, uint_least3… | |
- size_t *output) | |
+is_titlecase(HERODOTUS_READER *r, size_t *output) | |
{ | |
enum case_property prop; | |
- size_t next_wb, srcoff, res, tmp_output; | |
+ enum herodotus_status s; | |
+ bool ret = true; | |
uint_least32_t cp; | |
- for (srcoff = 0; ; ) { | |
- if (get_codepoint == get_codepoint_utf8) { | |
- if ((next_wb = grapheme_next_word_break_utf8((const ch… | |
- srclen - … | |
- /* we consumed all of the string */ | |
- break; | |
- } | |
- } else { | |
- if ((next_wb = grapheme_next_word_break((const uint_le… | |
- srclen - srcof… | |
- /* we consumed all of the string */ | |
- break; | |
- } | |
- } | |
- | |
- for (; next_wb > 0 && srcoff < srclen; next_wb -= res, srcoff … | |
+ for (;;) { | |
+ herodotus_reader_push_advance_limit(r, herodotus_next_word_bre… | |
+ for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODO… | |
/* check if we have a cased character */ | |
- res = get_codepoint(src, srclen, srcoff, &cp); | |
prop = get_case_property(cp); | |
if (prop == CASE_PROP_CASED || | |
prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) { | |
break; | |
- } | |
- } | |
- | |
- if (next_wb > 0) { | |
- /* get character length */ | |
- res = get_codepoint(src, srclen, srcoff, &cp); | |
- | |
- /* we have a cased character at srcoff, check if it's … | |
- if (get_codepoint == get_codepoint_utf8) { | |
- if (!is_case((const char *)src + srcoff, | |
- srclen - srcoff, res, | |
- get_codepoint_utf8, title_major, | |
- title_minor, title_special, &tmp… | |
- if (output) { | |
- *output = srcoff + tmp_output; | |
- } | |
- return false; | |
- } | |
} else { | |
- if (!is_case((const uint_least32_t *)src + src… | |
- srclen - srcoff, res, | |
- get_codepoint, title_major, | |
- title_minor, title_special, &tmp… | |
- if (output) { | |
- *output = srcoff + tmp_output; | |
- } | |
- return false; | |
- } | |
+ /* increment reader */ | |
+ herodotus_read_codepoint(r, true, &cp); | |
} | |
+ } | |
+ if (s == HERODOTUS_STATUS_END_OF_BUFFER) { | |
+ /* we are done */ | |
+ break; | |
+ } else if (s == HERODOTUS_STATUS_SOFT_LIMIT_REACHED) { | |
+ /* | |
+ * we did not encounter any cased character | |
+ * up to the word break | |
+ */ | |
+ continue; | |
+ } else { | |
/* | |
- * we consumed a character (make sure to never | |
- * underflow next_wb; this should not happen, | |
- * but it's better to be sure) | |
+ * we encountered a cased character before the word | |
+ * break, check if it's titlecase | |
*/ | |
- srcoff += res; | |
- next_wb -= (res <= next_wb) ? res : next_wb; | |
+ herodotus_reader_push_advance_limit(r, | |
+ herodotus_reader_next_codepoint_break(r)); | |
+ if (!is_case(r, title_major, title_minor, title_specia… | |
+ ret = false; | |
+ goto done; | |
+ } | |
+ herodotus_reader_pop_limit(r); | |
} | |
/* check if the rest of the codepoints in the word are lowerca… | |
- if (get_codepoint == get_codepoint_utf8) { | |
- if (!is_case((const char *)src + srcoff, | |
- srclen - srcoff, next_wb, | |
- get_codepoint_utf8, lower_major, | |
- lower_minor, lower_special, &tmp_output)… | |
- if (output) { | |
- *output = srcoff + tmp_output; | |
- } | |
- return false; | |
- } | |
- } else { | |
- if (!is_case((const uint_least32_t *)src + srcoff, | |
- srclen - srcoff, next_wb, | |
- get_codepoint, lower_major, | |
- lower_minor, lower_special, &tmp_output)… | |
- if (output) { | |
- *output = srcoff + tmp_output; | |
- } | |
- return false; | |
- } | |
+ if (!is_case(r, lower_major, lower_minor, lower_special, NULL)… | |
+ ret = false; | |
+ goto done; | |
} | |
- srcoff += next_wb; | |
- } | |
+ /* remove the limit on the word before the next iteration */ | |
+ herodotus_reader_pop_limit(r); | |
+ } | |
+done: | |
if (output) { | |
- *output = srcoff; | |
+ *output = herodotus_reader_number_read(r); | |
} | |
- return true; | |
+ return ret; | |
} | |
bool | |
grapheme_is_uppercase(const uint_least32_t *src, size_t srclen, size_t *casele… | |
{ | |
- return is_case(src, srclen, srclen, get_codepoint, | |
- upper_major, upper_minor, upper_special, caselen); | |
+ HERODOTUS_READER r; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen); | |
+ | |
+ return is_case(&r, upper_major, upper_minor, upper_special, caselen); | |
} | |
bool | |
grapheme_is_lowercase(const uint_least32_t *src, size_t srclen, size_t *casele… | |
{ | |
- return is_case(src, srclen, srclen, get_codepoint, | |
- lower_major, lower_minor, lower_special, caselen); | |
+ HERODOTUS_READER r; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen); | |
+ | |
+ return is_case(&r, lower_major, lower_minor, lower_special, caselen); | |
} | |
bool | |
grapheme_is_titlecase(const uint_least32_t *src, size_t srclen, size_t *casele… | |
{ | |
- return is_titlecase(src, srclen, get_codepoint, caselen); | |
+ HERODOTUS_READER r; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen); | |
+ | |
+ return is_titlecase(&r, caselen); | |
} | |
bool | |
grapheme_is_uppercase_utf8(const char *src, size_t srclen, size_t *caselen) | |
{ | |
- return is_case(src, srclen, srclen, get_codepoint_utf8, | |
- upper_major, upper_minor, upper_special, caselen); | |
+ HERODOTUS_READER r; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen); | |
+ | |
+ return is_case(&r, upper_major, upper_minor, upper_special, caselen); | |
} | |
bool | |
grapheme_is_lowercase_utf8(const char *src, size_t srclen, size_t *caselen) | |
{ | |
- return is_case(src, srclen, srclen, get_codepoint_utf8, | |
- lower_major, lower_minor, lower_special, caselen); | |
+ HERODOTUS_READER r; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen); | |
+ return is_case(&r, lower_major, lower_minor, lower_special, caselen); | |
} | |
bool | |
grapheme_is_titlecase_utf8(const char *src, size_t srclen, size_t *caselen) | |
{ | |
- return is_titlecase(src, srclen, get_codepoint_utf8, caselen); | |
+ HERODOTUS_READER r; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen); | |
+ | |
+ return is_titlecase(&r, caselen); | |
} | |
diff --git a/src/util.h b/src/util.h | |
@@ -79,6 +79,7 @@ void herodotus_reader_init(HERODOTUS_READER *, enum herodotus… | |
void herodotus_reader_copy(const HERODOTUS_READER *, HERODOTUS_READER *); | |
void herodotus_reader_push_advance_limit(HERODOTUS_READER *, size_t); | |
void herodotus_reader_pop_limit(HERODOTUS_READER *); | |
+size_t herodotus_reader_number_read(const HERODOTUS_READER *); | |
size_t herodotus_reader_next_word_break(const HERODOTUS_READER *); | |
size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *); | |
enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_… | |
@@ -86,7 +87,7 @@ enum herodotus_status herodotus_read_codepoint(HERODOTUS_READ… | |
void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *, | |
size_t); | |
void herodotus_writer_nul_terminate(HERODOTUS_WRITER *); | |
-size_t herodotus_writer_number_written(HERODOTUS_WRITER *); | |
+size_t herodotus_writer_number_written(const HERODOTUS_WRITER *); | |
void herodotus_write_codepoint(HERODOTUS_WRITER *, uint_least32_t); | |
size_t get_codepoint(const void *, size_t, size_t, uint_least32_t *); | |
diff --git a/test/case.c b/test/case.c | |
@@ -7,6 +7,18 @@ | |
#include "../grapheme.h" | |
#include "util.h" | |
+struct unit_test_is_case_utf8 { | |
+ const char *description; | |
+ struct { | |
+ const char *src; | |
+ size_t srclen; | |
+ } input; | |
+ struct { | |
+ bool ret; | |
+ size_t caselen; | |
+ } output; | |
+}; | |
+ | |
struct unit_test_to_case_utf8 { | |
const char *description; | |
struct { | |
@@ -20,7 +32,201 @@ struct unit_test_to_case_utf8 { | |
} output; | |
}; | |
-static struct unit_test_to_case_utf8 lowercase_utf8[] = { | |
+static struct unit_test_is_case_utf8 is_lowercase_utf8[] = { | |
+ { | |
+ .description = "empty input", | |
+ .input = { "", 0 }, | |
+ .output = { true, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, violation", | |
+ .input = { "A", 1 }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, confirmation", | |
+ .input = { "\xc3\x9f", 2 }, | |
+ .output = { true, 2 }, | |
+ }, | |
+ { | |
+ .description = "one character, violation, NUL-terminated", | |
+ .input = { "A", SIZE_MAX }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, confirmation, NUL-terminated", | |
+ .input = { "\xc3\x9f", SIZE_MAX }, | |
+ .output = { true, 2 }, | |
+ }, | |
+ { | |
+ .description = "one word, violation", | |
+ .input = { "Hello", 5 }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one word, partial confirmation", | |
+ .input = { "gru" "\xc3\x9f" "fOrmel", 11 }, | |
+ .output = { false, 6 }, | |
+ }, | |
+ { | |
+ .description = "one word, full confirmation", | |
+ .input = { "gru" "\xc3\x9f" "formel", 11 }, | |
+ .output = { true, 11 }, | |
+ }, | |
+ { | |
+ .description = "one word, violation, NUL-terminated", | |
+ .input = { "Hello", SIZE_MAX }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one word, partial confirmation, NUL-terminated… | |
+ .input = { "gru" "\xc3\x9f" "fOrmel", SIZE_MAX }, | |
+ .output = { false, 6 }, | |
+ }, | |
+ { | |
+ .description = "one word, full confirmation, NUL-terminated", | |
+ .input = { "gru" "\xc3\x9f" "formel", SIZE_MAX }, | |
+ .output = { true, 11 }, | |
+ }, | |
+}; | |
+ | |
+static struct unit_test_is_case_utf8 is_uppercase_utf8[] = { | |
+ { | |
+ .description = "empty input", | |
+ .input = { "", 0 }, | |
+ .output = { true, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, violation", | |
+ .input = { "\xc3\x9f", 2 }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, confirmation", | |
+ .input = { "A", 1 }, | |
+ .output = { true, 1 }, | |
+ }, | |
+ { | |
+ .description = "one character, violation, NUL-terminated", | |
+ .input = { "\xc3\x9f", SIZE_MAX }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, confirmation, NUL-terminated", | |
+ .input = { "A", SIZE_MAX }, | |
+ .output = { true, 1 }, | |
+ }, | |
+ { | |
+ .description = "one word, violation", | |
+ .input = { "hello", 5 }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one word, partial confirmation", | |
+ .input = { "GRU" "\xc3\x9f" "formel", 11 }, | |
+ .output = { false, 3 }, | |
+ }, | |
+ { | |
+ .description = "one word, full confirmation", | |
+ .input = { "HELLO", 5 }, | |
+ .output = { true, 5 }, | |
+ }, | |
+ { | |
+ .description = "one word, violation, NUL-terminated", | |
+ .input = { "hello", SIZE_MAX }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one word, partial confirmation, NUL-terminated… | |
+ .input = { "GRU" "\xc3\x9f" "formel", SIZE_MAX }, | |
+ .output = { false, 3 }, | |
+ }, | |
+ { | |
+ .description = "one word, full confirmation, NUL-terminated", | |
+ .input = { "HELLO", SIZE_MAX }, | |
+ .output = { true, 5 }, | |
+ }, | |
+}; | |
+ | |
+static struct unit_test_is_case_utf8 is_titlecase_utf8[] = { | |
+ { | |
+ .description = "empty input", | |
+ .input = { "", 0 }, | |
+ .output = { true, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, violation", | |
+ .input = { "\xc3\x9f", 2 }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, confirmation", | |
+ .input = { "A", 1 }, | |
+ .output = { true, 1 }, | |
+ }, | |
+ { | |
+ .description = "one character, violation, NUL-terminated", | |
+ .input = { "\xc3\x9f", SIZE_MAX }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one character, confirmation, NUL-terminated", | |
+ .input = { "A", SIZE_MAX }, | |
+ .output = { true, 1 }, | |
+ }, | |
+ { | |
+ .description = "one word, violation", | |
+ .input = { "hello", 5 }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one word, partial confirmation", | |
+ .input = { "Gru" "\xc3\x9f" "fOrmel", 11 }, | |
+ .output = { false, 6 }, | |
+ }, | |
+ { | |
+ .description = "one word, full confirmation", | |
+ .input = { "Gru" "\xc3\x9f" "formel", 11 }, | |
+ .output = { true, 11 }, | |
+ }, | |
+ { | |
+ .description = "one word, violation, NUL-terminated", | |
+ .input = { "hello", SIZE_MAX }, | |
+ .output = { false, 0 }, | |
+ }, | |
+ { | |
+ .description = "one word, partial confirmation, NUL-terminated… | |
+ .input = { "Gru" "\xc3\x9f" "fOrmel", SIZE_MAX }, | |
+ .output = { false, 6 }, | |
+ }, | |
+ { | |
+ .description = "one word, full confirmation, NUL-terminated", | |
+ .input = { "Gru" "\xc3\x9f" "formel", SIZE_MAX }, | |
+ .output = { true, 11 }, | |
+ }, | |
+ { | |
+ .description = "multiple words, partial confirmation", | |
+ .input = { "Hello Gru" "\xc3\x9f" "fOrmel!", 18 }, | |
+ .output = { false, 12 }, | |
+ }, | |
+ { | |
+ .description = "multiple words, full confirmation", | |
+ .input = { "Hello Gru" "\xc3\x9f" "formel!", 18 }, | |
+ .output = { true, 18 }, | |
+ }, | |
+ { | |
+ .description = "multiple words, partial confirmation, NUL-term… | |
+ .input = { "Hello Gru" "\xc3\x9f" "fOrmel!", SIZE_MAX }, | |
+ .output = { false, 12 }, | |
+ }, | |
+ { | |
+ .description = "multiple words, full confirmation, NUL-termina… | |
+ .input = { "Hello Gru" "\xc3\x9f" "formel!", SIZE_MAX }, | |
+ .output = { true, 18 }, | |
+ }, | |
+}; | |
+ | |
+static struct unit_test_to_case_utf8 to_lowercase_utf8[] = { | |
{ | |
.description = "empty input", | |
.input = { "", 0, 10 }, | |
@@ -38,8 +244,8 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = { | |
}, | |
{ | |
.description = "one character, no conversion", | |
- .input = { "a", 1, 10 }, | |
- .output = { "a", 1 }, | |
+ .input = { "\xc3\x9f", 2, 10 }, | |
+ .output = { "\xc3\x9f", 2 }, | |
}, | |
{ | |
.description = "one character, conversion, truncation", | |
@@ -53,8 +259,8 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = { | |
}, | |
{ | |
.description = "one character, no conversion, NUL-terminated", | |
- .input = { "a", SIZE_MAX, 10 }, | |
- .output = { "a", 1 }, | |
+ .input = { "\xc3\x9f", SIZE_MAX, 10 }, | |
+ .output = { "\xc3\x9f", 2 }, | |
}, | |
{ | |
.description = "one character, conversion, NUL-terminated, tru… | |
@@ -93,7 +299,7 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = { | |
}, | |
}; | |
-static struct unit_test_to_case_utf8 uppercase_utf8[] = { | |
+static struct unit_test_to_case_utf8 to_uppercase_utf8[] = { | |
{ | |
.description = "empty input", | |
.input = { "", 0, 10 }, | |
@@ -106,8 +312,8 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = { | |
}, | |
{ | |
.description = "one character, conversion", | |
- .input = { "a", 1, 10 }, | |
- .output = { "A", 1 }, | |
+ .input = { "\xc3\x9f", 2, 10 }, | |
+ .output = { "SS", 2 }, | |
}, | |
{ | |
.description = "one character, no conversion", | |
@@ -116,13 +322,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = { | |
}, | |
{ | |
.description = "one character, conversion, truncation", | |
- .input = { "a", 1, 0 }, | |
- .output = { "", 1 }, | |
+ .input = { "\xc3\x9f", 2, 0 }, | |
+ .output = { "", 2 }, | |
}, | |
{ | |
.description = "one character, conversion, NUL-terminated", | |
- .input = { "a", SIZE_MAX, 10 }, | |
- .output = { "A", 1 }, | |
+ .input = { "\xc3\x9f", SIZE_MAX, 10 }, | |
+ .output = { "SS", 2 }, | |
}, | |
{ | |
.description = "one character, no conversion, NUL-terminated", | |
@@ -131,13 +337,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = { | |
}, | |
{ | |
.description = "one character, conversion, NUL-terminated, tru… | |
- .input = { "a", SIZE_MAX, 0 }, | |
- .output = { "", 1 }, | |
+ .input = { "\xc3\x9f", SIZE_MAX, 0 }, | |
+ .output = { "", 2 }, | |
}, | |
{ | |
.description = "one word, conversion", | |
- .input = { "wOrD", 4, 10 }, | |
- .output = { "WORD", 4 }, | |
+ .input = { "gRu" "\xc3\x9f" "fOrMel", 11, 15 }, | |
+ .output = { "GRUSSFORMEL", 11 }, | |
}, | |
{ | |
.description = "one word, no conversion", | |
@@ -146,13 +352,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = { | |
}, | |
{ | |
.description = "one word, conversion, truncation", | |
- .input = { "wOrD", 4, 3 }, | |
- .output = { "WO", 4 }, | |
+ .input = { "gRu" "\xc3\x9f" "formel", 11, 5 }, | |
+ .output = { "GRUS", 11 }, | |
}, | |
{ | |
.description = "one word, conversion, NUL-terminated", | |
- .input = { "wOrD", SIZE_MAX, 10 }, | |
- .output = { "WORD", 4 }, | |
+ .input = { "gRu" "\xc3\x9f" "formel", SIZE_MAX, 15 }, | |
+ .output = { "GRUSSFORMEL", 11 }, | |
}, | |
{ | |
.description = "one word, no conversion, NUL-terminated", | |
@@ -161,12 +367,12 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = { | |
}, | |
{ | |
.description = "one word, conversion, NUL-terminated, truncati… | |
- .input = { "wOrD", SIZE_MAX, 3 }, | |
- .output = { "WO", 4 }, | |
+ .input = { "gRu" "\xc3\x9f" "formel", SIZE_MAX, 5 }, | |
+ .output = { "GRUS", 11 }, | |
}, | |
}; | |
-static struct unit_test_to_case_utf8 titlecase_utf8[] = { | |
+static struct unit_test_to_case_utf8 to_titlecase_utf8[] = { | |
{ | |
.description = "empty input", | |
.input = { "", 0, 10 }, | |
@@ -270,6 +476,42 @@ static struct unit_test_to_case_utf8 titlecase_utf8[] = { | |
}; | |
static int | |
+unit_test_callback_is_case_utf8(void *t, size_t off, const char *name, const c… | |
+{ | |
+ struct unit_test_is_case_utf8 *test = (struct unit_test_is_case_utf8 *… | |
+ bool ret = false; | |
+ size_t caselen = 0x7f; | |
+ | |
+ if (t == is_lowercase_utf8) { | |
+ ret = grapheme_is_lowercase_utf8(test->input.src, test->input.… | |
+ &caselen); | |
+ } else if (t == is_uppercase_utf8) { | |
+ ret = grapheme_is_uppercase_utf8(test->input.src, test->input.… | |
+ &caselen); | |
+ } else if (t == is_titlecase_utf8) { | |
+ ret = grapheme_is_titlecase_utf8(test->input.src, test->input.… | |
+ &caselen); | |
+ | |
+ } else { | |
+ goto err; | |
+ } | |
+ | |
+ /* check results */ | |
+ if (ret != test->output.ret || caselen != test->output.caselen) { | |
+ goto err; | |
+ } | |
+ | |
+ return 0; | |
+err: | |
+ fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " | |
+ "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0, | |
+ name, off, test->description, ret ? "true" : "false", | |
+ caselen, test->output.ret ? "true" : "false", | |
+ test->output.caselen); | |
+ return 1; | |
+} | |
+ | |
+static int | |
unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const c… | |
{ | |
struct unit_test_to_case_utf8 *test = (struct unit_test_to_case_utf8 *… | |
@@ -279,13 +521,13 @@ unit_test_callback_to_case_utf8(void *t, size_t off, cons… | |
/* fill the array with canary values */ | |
memset(buf, 0x7f, LEN(buf)); | |
- if (t == lowercase_utf8) { | |
+ if (t == to_lowercase_utf8) { | |
ret = grapheme_to_lowercase_utf8(test->input.src, test->input.… | |
buf, test->input.destlen); | |
- } else if (t == uppercase_utf8) { | |
+ } else if (t == to_uppercase_utf8) { | |
ret = grapheme_to_uppercase_utf8(test->input.src, test->input.… | |
buf, test->input.destlen); | |
- } else if (t == titlecase_utf8) { | |
+ } else if (t == to_titlecase_utf8) { | |
ret = grapheme_to_titlecase_utf8(test->input.src, test->input.… | |
buf, test->input.destlen); | |
} else { | |
@@ -319,10 +561,16 @@ main(int argc, char *argv[]) | |
{ | |
(void)argc; | |
- return run_unit_tests(unit_test_callback_to_case_utf8, lowercase_utf8, | |
- LEN(lowercase_utf8), "grapheme_to_lowercase_utf8… | |
- run_unit_tests(unit_test_callback_to_case_utf8, uppercase_utf8, | |
- LEN(uppercase_utf8), "grapheme_to_uppercase_utf8… | |
- run_unit_tests(unit_test_callback_to_case_utf8, titlecase_utf8, | |
- LEN(titlecase_utf8), "grapheme_to_titlecase_utf8… | |
+ return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_ut… | |
+ LEN(is_lowercase_utf8), "grapheme_is_lowercase_u… | |
+ run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_ut… | |
+ LEN(is_uppercase_utf8), "grapheme_is_uppercase_u… | |
+ run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_ut… | |
+ LEN(is_titlecase_utf8), "grapheme_is_titlecase_u… | |
+ run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_ut… | |
+ LEN(to_lowercase_utf8), "grapheme_to_lowercase_u… | |
+ run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_ut… | |
+ LEN(to_uppercase_utf8), "grapheme_to_uppercase_u… | |
+ run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_ut… | |
+ LEN(to_titlecase_utf8), "grapheme_to_titlecase_u… | |
} |