GopherProxy

	Refactor character-functions with Herodotus - libgrapheme - unicode string libr…
	git clone git://git.suckless.org/libgrapheme
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 65785f699be45dd77bdcbfc1d3aded39151f3205
	parent b13acfd6cd5114fcddbffaf9855664a95f966403
	Author: Laslo Hunhold <[email protected]>
	Date: Sat, 24 Sep 2022 11:45:20 +0200

	Refactor character-functions with Herodotus

	This also unifies the code and drops a lot of complicated state
	handling.

	Signed-off-by: Laslo Hunhold <[email protected]>

	Diffstat:
	M src/character.c \| 60 ++++++++++-------------------…
	M src/util.c \| 6 +++++-

	2 files changed, 24 insertions(+), 42 deletions(-)
	---
	diff --git a/src/character.c b/src/character.c
	@@ -175,61 +175,39 @@ grapheme_is_character_break(uint_least32_t cp0, uint_leas…
	return !notbreak;
	}

	-size_t
	-grapheme_next_character_break(const uint_least32_t *str, size_t len)
	+static size_t
	+next_character_break(HERODOTUS_READER *r)
	{
	GRAPHEME_STATE state = { 0 };
	- size_t off;
	-
	- if (str == NULL \|\| len == 0) {
	- return 0;
	- }
	+ uint_least32_t cp0 = 0, cp1 = 0;

	- for (off = 1; off < len; off++) {
	- if (grapheme_is_character_break(str[off - 1], str[off], &state…
	+ for (herodotus_read_codepoint(r, true, &cp0);
	+ herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCC…
	+ herodotus_read_codepoint(r, true, &cp0)) {
	+ if (grapheme_is_character_break(cp0, cp1, &state)) {
	break;
	}
	}

	- return off;
	+ return herodotus_reader_number_read(r);
	}

	size_t
	-grapheme_next_character_break_utf8(const char *str, size_t len)
	+grapheme_next_character_break(const uint_least32_t *str, size_t len)
	{
	- GRAPHEME_STATE state = { 0 };
	- uint_least32_t cp0 = 0, cp1 = 0;
	- size_t off, ret;
	-
	- if (str == NULL \|\| len == 0) {
	- return 0;
	- }
	+ HERODOTUS_READER r;

	- for (off = 0; (len == SIZE_MAX) \|\| off < len; off += ret) {
	- cp0 = cp1;
	- ret = grapheme_decode_utf8(str + off, (len == SIZE_MAX) ?
	- SIZE_MAX : len - off, &cp1);
	+ herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len);

	- if (len != SIZE_MAX && ret > (len - off)) {
	- /* string ended abruptly, simply accept cropping */
	- ret = len - off;
	- }
	+ return next_character_break(&r);
	+}

	- if (len == SIZE_MAX && cp1 == 0) {
	- /* we hit a NUL-byte and are done */
	- break;
	- }
	+size_t
	+grapheme_next_character_break_utf8(const char *str, size_t len)
	+{
	+ HERODOTUS_READER r;

	- if (off == 0) {
	- /*
	- * we skip the first round, as we need both
	- * cp0 and cp1 to be initialized
	- */
	- continue;
	- } else if (grapheme_is_character_break(cp0, cp1, &state)) {
	- break;
	- }
	- }
	+ herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len);

	- return off;
	+ return next_character_break(&r);
	}
	diff --git a/src/util.c b/src/util.c
	@@ -111,7 +111,11 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance…
	}

	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
	- cp = ((const uint_least32_t )(r->src))[r->off++];
	+ cp = ((const uint_least32_t )(r->src))[r->off];
	+
	+ if (advance) {
	+ r->off++;
	+ }
	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
	ret = grapheme_decode_utf8((const char *)r->src + r->off,
	MIN(r->srclen, r->soft_limit[0]) -