Introduction
Introduction Statistics Contact Development Disclaimer Help
Refactor character-functions with Herodotus - libgrapheme - unicode string libr…
git clone git://git.suckless.org/libgrapheme
Log
Files
Refs
README
LICENSE
---
commit 65785f699be45dd77bdcbfc1d3aded39151f3205
parent b13acfd6cd5114fcddbffaf9855664a95f966403
Author: Laslo Hunhold <[email protected]>
Date: Sat, 24 Sep 2022 11:45:20 +0200
Refactor character-functions with Herodotus
This also unifies the code and drops a lot of complicated state
handling.
Signed-off-by: Laslo Hunhold <[email protected]>
Diffstat:
M src/character.c | 60 ++++++++++-------------------…
M src/util.c | 6 +++++-
2 files changed, 24 insertions(+), 42 deletions(-)
---
diff --git a/src/character.c b/src/character.c
@@ -175,61 +175,39 @@ grapheme_is_character_break(uint_least32_t cp0, uint_leas…
return !notbreak;
}
-size_t
-grapheme_next_character_break(const uint_least32_t *str, size_t len)
+static size_t
+next_character_break(HERODOTUS_READER *r)
{
GRAPHEME_STATE state = { 0 };
- size_t off;
-
- if (str == NULL || len == 0) {
- return 0;
- }
+ uint_least32_t cp0 = 0, cp1 = 0;
- for (off = 1; off < len; off++) {
- if (grapheme_is_character_break(str[off - 1], str[off], &state…
+ for (herodotus_read_codepoint(r, true, &cp0);
+ herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCC…
+ herodotus_read_codepoint(r, true, &cp0)) {
+ if (grapheme_is_character_break(cp0, cp1, &state)) {
break;
}
}
- return off;
+ return herodotus_reader_number_read(r);
}
size_t
-grapheme_next_character_break_utf8(const char *str, size_t len)
+grapheme_next_character_break(const uint_least32_t *str, size_t len)
{
- GRAPHEME_STATE state = { 0 };
- uint_least32_t cp0 = 0, cp1 = 0;
- size_t off, ret;
-
- if (str == NULL || len == 0) {
- return 0;
- }
+ HERODOTUS_READER r;
- for (off = 0; (len == SIZE_MAX) || off < len; off += ret) {
- cp0 = cp1;
- ret = grapheme_decode_utf8(str + off, (len == SIZE_MAX) ?
- SIZE_MAX : len - off, &cp1);
+ herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len);
- if (len != SIZE_MAX && ret > (len - off)) {
- /* string ended abruptly, simply accept cropping */
- ret = len - off;
- }
+ return next_character_break(&r);
+}
- if (len == SIZE_MAX && cp1 == 0) {
- /* we hit a NUL-byte and are done */
- break;
- }
+size_t
+grapheme_next_character_break_utf8(const char *str, size_t len)
+{
+ HERODOTUS_READER r;
- if (off == 0) {
- /*
- * we skip the first round, as we need both
- * cp0 and cp1 to be initialized
- */
- continue;
- } else if (grapheme_is_character_break(cp0, cp1, &state)) {
- break;
- }
- }
+ herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len);
- return off;
+ return next_character_break(&r);
}
diff --git a/src/util.c b/src/util.c
@@ -111,7 +111,11 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance…
}
if (r->type == HERODOTUS_TYPE_CODEPOINT) {
- *cp = ((const uint_least32_t *)(r->src))[r->off++];
+ *cp = ((const uint_least32_t *)(r->src))[r->off];
+
+ if (advance) {
+ r->off++;
+ }
} else { /* r->type == HERODOTUS_TYPE_UTF8 */
ret = grapheme_decode_utf8((const char *)r->src + r->off,
MIN(r->srclen, r->soft_limit[0]) -
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.