Refactor line-functions with Herodotus - libgrapheme - unicode string library | |
git clone git://git.suckless.org/libgrapheme | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit a4d42053f13e8471ee3903522f964fc0a1d3161a | |
parent 65785f699be45dd77bdcbfc1d3aded39151f3205 | |
Author: Laslo Hunhold <[email protected]> | |
Date: Sat, 24 Sep 2022 12:26:19 +0200 | |
Refactor line-functions with Herodotus | |
Signed-off-by: Laslo Hunhold <[email protected]> | |
Diffstat: | |
M src/line.c | 59 ++++++++++++-----------------… | |
1 file changed, 23 insertions(+), 36 deletions(-) | |
--- | |
diff --git a/src/line.c b/src/line.c | |
@@ -18,22 +18,15 @@ get_break_prop(uint_least32_t cp) | |
} | |
static size_t | |
-next_line_break(const void *str, size_t len, size_t (*get_codepoint) | |
- (const void *, size_t, size_t, uint_least32_t *)) | |
+next_line_break(HERODOTUS_READER *r) | |
{ | |
+ HERODOTUS_READER tmp; | |
enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop, | |
last_non_sp_prop, last_non_sp_cm_or_zwj_prop; | |
- enum line_break_property res; | |
uint_least32_t cp; | |
uint_least8_t lb25_level = 0; | |
- size_t off, new_off; | |
bool lb21a_flag = false, ri_even = true; | |
- /* check degenerate cases */ | |
- if (str == NULL || len == 0) { | |
- return 0; | |
- } | |
- | |
/* | |
* Apply line breaking algorithm (UAX #14), see | |
* https://unicode.org/reports/tr14/#Algorithm and tailoring | |
@@ -47,28 +40,14 @@ next_line_break(const void *str, size_t len, size_t (*get_c… | |
* Initialize the different properties such that we have | |
* a good state after the state-update in the loop | |
*/ | |
- cp0_prop = NUM_LINE_BREAK_PROPS; | |
- if ((off = get_codepoint(str, len, 0, &cp)) >= len) { | |
- /* | |
- * A line is at least one codepoint long, so we can | |
- * safely return here | |
- */ | |
- return len; | |
- } | |
- cp1_prop = get_break_prop(cp); | |
last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */ | |
last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS; | |
- for (; off < len; off = new_off) { | |
- /* update state */ | |
- cp0_prop = cp1_prop; | |
- if ((new_off = off + get_codepoint(str, len, off, &cp)) <= len… | |
- get_codepoint(str, len, off, &cp); | |
- cp1_prop = get_break_prop(cp); | |
- } else { | |
- /* LB3 */ | |
- break; | |
- } | |
+ for (herodotus_read_codepoint(r, true, &cp), cp0_prop = get_break_prop… | |
+ herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCE… | |
+ herodotus_read_codepoint(r, true, &cp), cp0_prop = cp1_prop) { | |
+ /* get property of the right codepoint */ | |
+ cp1_prop = get_break_prop(cp); | |
/* update retention-states */ | |
@@ -380,14 +359,14 @@ next_line_break(const void *str, size_t len, size_t (*get… | |
* two adjacent codepoints as we have it with | |
* characters. | |
*/ | |
- if (new_off < len && | |
+ herodotus_reader_copy(r, &tmp); | |
+ herodotus_read_codepoint(&tmp, true, &cp); | |
+ if (herodotus_read_codepoint(&tmp, true, &cp) == | |
+ HERODOTUS_STATUS_SUCCESS && | |
(cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || | |
cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF || | |
cp1_prop == LINE_BREAK_PROP_HY)) { | |
- get_codepoint(str, len, new_off, &cp); | |
- res = get_break_prop(cp); | |
- | |
- if (res == LINE_BREAK_PROP_NU) { | |
+ if (get_break_prop(cp) == LINE_BREAK_PROP_NU) { | |
continue; | |
} | |
} | |
@@ -507,17 +486,25 @@ next_line_break(const void *str, size_t len, size_t (*get… | |
break; | |
} | |
- return off; | |
+ return herodotus_reader_number_read(r); | |
} | |
size_t | |
grapheme_next_line_break(const uint_least32_t *str, size_t len) | |
{ | |
- return next_line_break(str, len, get_codepoint); | |
+ HERODOTUS_READER r; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len); | |
+ | |
+ return next_line_break(&r); | |
} | |
size_t | |
grapheme_next_line_break_utf8(const char *str, size_t len) | |
{ | |
- return next_line_break(str, len, get_codepoint_utf8); | |
+ HERODOTUS_READER r; | |
+ | |
+ herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len); | |
+ | |
+ return next_line_break(&r); | |
} |