Refactor character-functions with Herodotus - libgrapheme - unicode string libr… | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 65785f699be45dd77bdcbfc1d3aded39151f3205 | |
parent b13acfd6cd5114fcddbffaf9855664a95f966403 | |
Author: Laslo Hunhold <[email protected]> | |
Date: Sat, 24 Sep 2022 11:45:20 +0200 | |
Refactor character-functions with Herodotus | |
This also unifies the code and drops a lot of complicated state | |
handling. | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M src/character.c | 60 ++++++++++-------------------… | |
M src/util.c | 6 +++++- | |
2 files changed, 24 insertions(+), 42 deletions(-) | |
--- | |
diff --git a/src/character.c b/src/character.c | |
@@ -175,61 +175,39 @@ grapheme_is_character_break(uint_least32_t cp0, uint_leas… | |
return !notbreak; | |
} | |
-size_t | |
-grapheme_next_character_break(const uint_least32_t *str, size_t len) | |
+static size_t | |
+next_character_break(HERODOTUS_READER *r) | |
{ | |
GRAPHEME_STATE state = { 0 }; | |
- size_t off; | |
- | |
- if (str == NULL || len == 0) { | |
- return 0; | |
- } | |
+ uint_least32_t cp0 = 0, cp1 = 0; | |
- for (off = 1; off < len; off++) { | |
- if (grapheme_is_character_break(str[off - 1], str[off], &state… | |
+ for (herodotus_read_codepoint(r, true, &cp0); | |
+ herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCC… | |
+ herodotus_read_codepoint(r, true, &cp0)) { | |
+ if (grapheme_is_character_break(cp0, cp1, &state)) { | |
break; | |
} | |
} | |
- return off; | |
+ return herodotus_reader_number_read(r); | |
} | |
size_t | |
-grapheme_next_character_break_utf8(const char *str, size_t len) | |
+grapheme_next_character_break(const uint_least32_t *str, size_t len) | |
{ | |
- GRAPHEME_STATE state = { 0 }; | |
- uint_least32_t cp0 = 0, cp1 = 0; | |
- size_t off, ret; | |
- | |
- if (str == NULL || len == 0) { | |
- return 0; | |
- } | |
+ HERODOTUS_READER r; | |
- for (off = 0; (len == SIZE_MAX) || off < len; off += ret) { | |
- cp0 = cp1; | |
- ret = grapheme_decode_utf8(str + off, (len == SIZE_MAX) ? | |
- SIZE_MAX : len - off, &cp1); | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len); | |
- if (len != SIZE_MAX && ret > (len - off)) { | |
- /* string ended abruptly, simply accept cropping */ | |
- ret = len - off; | |
- } | |
+ return next_character_break(&r); | |
+} | |
- if (len == SIZE_MAX && cp1 == 0) { | |
- /* we hit a NUL-byte and are done */ | |
- break; | |
- } | |
+size_t | |
+grapheme_next_character_break_utf8(const char *str, size_t len) | |
+{ | |
+ HERODOTUS_READER r; | |
- if (off == 0) { | |
- /* | |
- * we skip the first round, as we need both | |
- * cp0 and cp1 to be initialized | |
- */ | |
- continue; | |
- } else if (grapheme_is_character_break(cp0, cp1, &state)) { | |
- break; | |
- } | |
- } | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len); | |
- return off; | |
+ return next_character_break(&r); | |
} | |
diff --git a/src/util.c b/src/util.c | |
@@ -111,7 +111,11 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance… | |
} | |
if (r->type == HERODOTUS_TYPE_CODEPOINT) { | |
- *cp = ((const uint_least32_t *)(r->src))[r->off++]; | |
+ *cp = ((const uint_least32_t *)(r->src))[r->off]; | |
+ | |
+ if (advance) { | |
+ r->off++; | |
+ } | |
} else { /* r->type == HERODOTUS_TYPE_UTF8 */ | |
ret = grapheme_decode_utf8((const char *)r->src + r->off, | |
MIN(r->srclen, r->soft_limit[0]) - |